[alerter,manager] 告警信息入库,监控状态变更联动

This commit is contained in:
tomsun28
2021-12-10 16:56:39 +08:00
parent 5c49ed0335
commit 0694c8e36a
15 changed files with 247 additions and 27 deletions

View File

@@ -68,18 +68,24 @@ public class CalculateAlarm {
// 先判断采集响应数据状态 UN_REACHABLE/UN_CONNECTABLE 则需发最高级别告警
if (metricsData.getCode() != CollectRep.Code.SUCCESS) {
// 采集异常
if (metricsData.getCode() == CollectRep.Code.UN_REACHABLE
|| metricsData.getCode() == CollectRep.Code.UN_CONNECTABLE) {
// 连接型可用性异常 UN_REACHABLE 对端不可达(网络层icmp) UN_CONNECTABLE 对端连接失败(传输层tcp,udp)
Alert alert = Alert.builder()
.monitorId(monitorId)
.priority((byte) 0)
.status((byte) 0)
.target(CommonConstants.AVAILABLE)
.duration(300)
.content("监控紧急可用性告警: " + metricsData.getCode().name())
.build();
dataQueue.addAlertData(alert);
Alert.AlertBuilder alertBuilder = Alert.builder()
.monitorId(monitorId)
.priority((byte) 0)
.status((byte) 0)
.duration(300);
if (metricsData.getCode() == CollectRep.Code.UN_REACHABLE) {
// UN_REACHABLE 对端不可达(网络层icmp)
alertBuilder.target(CommonConstants.REACHABLE)
.content("监控紧急可达性告警: " + metricsData.getCode().name());
dataQueue.addAlertData(alertBuilder.build());
} else if (metricsData.getCode() == CollectRep.Code.UN_CONNECTABLE) {
// UN_CONNECTABLE 对端连接失败(传输层tcp,udp)
alertBuilder.target(CommonConstants.AVAILABLE)
.content("监控紧急可用性告警: " + metricsData.getCode().name());
dataQueue.addAlertData(alertBuilder.build());
} else {
// todo 其它规范异常 TIMEOUT ...
return;
}
return;
}
@@ -127,7 +133,7 @@ public class CalculateAlarm {
Expression expression = AviatorEvaluator.compile(expr, true);
Boolean match = (Boolean) expression.execute(fieldValueMap);
if (match) {
// 阈值规则匹配,触发告警 todo 告警延迟delay参数实现
// 阈值规则匹配,触发告警
Alert alert = Alert.builder()
.monitorId(monitorId)
.priority(define.getPriority())

View File

@@ -0,0 +1,14 @@
package com.usthe.alert.dao;
import com.usthe.alert.pojo.entity.Alert;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
/**
* Alert 数据库操作
* @author tom
* @date 2021/12/9 10:03
*/
public interface AlertDao extends JpaRepository<Alert, Long>, JpaSpecificationExecutor<Alert> {
}

View File

@@ -29,7 +29,7 @@ public interface AlertDefineDao extends JpaRepository<AlertDefine, Long>, JpaSpe
* @return 告警定义列表
*/
@Query("select define from AlertDefine define join AlertDefineBind bind on bind.alertDefineId = define.id " +
"where bind.monitorId = :monitorId and define.metric = :metrics")
"where bind.monitorId = :monitorId and define.metric = :metrics and define.enable = true")
List<AlertDefine> queryAlertDefinesByMonitor(@Param(value = "monitorId") Long monitorId,
@Param(value = "metrics") String metrics);
}

View File

@@ -61,8 +61,7 @@ public class Alert {
@ApiModelProperty(value = "告警目标对象: 监控可用性-available 指标-app.metrics.field",
example = "1", accessMode = READ_WRITE, position = 4)
@Min(0)
@Max(2)
@Length(max = 255)
private String target;
@ApiModelProperty(value = "触发告警后持续时间,单位s", example = "60", accessMode = READ_WRITE, position = 7)

View File

@@ -70,13 +70,9 @@ public class AlertDefine {
@Min(0)
private int duration;
@ApiModelProperty(value = "告警触发后是否发送", example = "true", accessMode = READ_WRITE, position = 8)
@ApiModelProperty(value = "告警阈值开关", example = "true", accessMode = READ_WRITE, position = 8)
private boolean enable = true;
@ApiModelProperty(value = "告警延迟时间,即延迟多久再发送告警,单位s", example = "300", accessMode = READ_WRITE, position = 9)
@Min(0)
private int delay;
@ApiModelProperty(value = "告警通知内容", example = "linux {monitor_name}: {monitor_id} cpu usage high",
accessMode = READ_WRITE, position = 10)
@Length(max = 1024)

View File

@@ -0,0 +1,29 @@
package com.usthe.alert.service;
import com.usthe.alert.pojo.entity.Alert;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.domain.Specification;
/**
* 告警信息管理接口
* @author tom
* @date 2021/12/9 10:06
*/
public interface AlertService {
/**
* 新增告警
* @param alert 告警实体
* @throws RuntimeException 新增过程异常抛出
*/
void addAlert(Alert alert) throws RuntimeException;
/**
* 动态条件查询
* @param specification 查询条件
* @param pageRequest 分页参数
* @return 查询结果
*/
Page<Alert> getAlerts(Specification<Alert> specification, PageRequest pageRequest);
}

View File

@@ -0,0 +1,32 @@
package com.usthe.alert.service.impl;
import com.usthe.alert.dao.AlertDao;
import com.usthe.alert.pojo.entity.Alert;
import com.usthe.alert.service.AlertService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.domain.Specification;
import org.springframework.stereotype.Service;
/**
* 告警信息服务实现
* @author tom
* @date 2021/12/10 15:39
*/
@Service
public class AlertServiceImpl implements AlertService {
@Autowired
private AlertDao alertDao;
@Override
public void addAlert(Alert alert) throws RuntimeException {
alertDao.save(alert);
}
@Override
public Page<Alert> getAlerts(Specification<Alert> specification, PageRequest pageRequest) {
return alertDao.findAll(specification, pageRequest);
}
}

View File

@@ -1,9 +1,10 @@
org.springframework.boot.autoconfigure.EnableAutoConfiguration=\
com.usthe.alert.service.impl.AlertDefineServiceImpl,\
com.usthe.alert.service.impl.AlertServiceImpl,\
com.usthe.alert.controller.AlertDefineController,\
com.usthe.alert.AlerterWorkerPool,\
com.usthe.alert.AlerterProperties,\
com.usthe.alert.AlerterDataQueue,\
com.usthe.alert.entrance.KafkaDataConsume,\
com.usthe.alert.AlerterConfiguration,\
com.usthe.alert.entrance.KafkaDataConsume,\
com.usthe.alert.calculate.CalculateAlarm

View File

@@ -87,4 +87,9 @@ public interface CommonConstants {
* 可用性对象
*/
String AVAILABLE = "available";
/**
* 可达性对象
*/
String REACHABLE = "reachable";
}

View File

@@ -0,0 +1,89 @@
package com.usthe.manager.component.alerter;
import com.usthe.alert.AlerterDataQueue;
import com.usthe.alert.AlerterWorkerPool;
import com.usthe.alert.pojo.entity.Alert;
import com.usthe.alert.service.AlertService;
import com.usthe.common.util.CommonConstants;
import com.usthe.manager.pojo.entity.Monitor;
import com.usthe.manager.service.MonitorService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
/**
* 告警信息入库分发
* @author tom
* @date 2021/12/10 12:58
*/
@Component
@Slf4j
public class DispatchAlarm {
private AlerterWorkerPool workerPool;
private AlerterDataQueue dataQueue;
private AlertService alertService;
private MonitorService monitorService;
public DispatchAlarm(AlerterWorkerPool workerPool, AlerterDataQueue dataQueue,
AlertService alertService, MonitorService monitorService) {
this.workerPool = workerPool;
this.dataQueue = dataQueue;
this.alertService = alertService;
this.monitorService = monitorService;
startDispatch();
}
private void startDispatch() {
Runnable runnable = () -> {
while (!Thread.currentThread().isInterrupted()) {
try {
Alert alert = dataQueue.pollAlertData();
if (alert != null) {
// 判断告警类型入库
storeAlertData(alert);
// 通知分发
sendAlertDataListener(alert);
}
} catch (InterruptedException e) {
log.error(e.getMessage());
}
}
};
workerPool.executeJob(runnable);
workerPool.executeJob(runnable);
workerPool.executeJob(runnable);
}
private void storeAlertData(Alert alert) {
// todo 过滤重复告警 使用 告警持续时间参数-duration 这个时间段的相同重复告警应该被过滤
// todo 使用缓存不直接操作库
Monitor monitor = monitorService.getMonitor(alert.getMonitorId());
if (monitor == null) {
log.warn("Dispatch alarm the monitorId: {} not existed, ignored.", alert.getMonitorId());
return;
}
alert.setMonitorName(monitor.getName());
if (monitor.getStatus() == CommonConstants.UN_MANAGE_CODE) {
// 当监控未管理时 忽略静默其告警信息
return;
}
if (monitor.getStatus() != CommonConstants.UN_AVAILABLE_CODE
&& monitor.getStatus() != CommonConstants.UN_REACHABLE_CODE) {
if (CommonConstants.AVAILABLE.equals(alert.getTarget())) {
// 可用性告警 需变更监控状态为不可用
monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_AVAILABLE_CODE);
} else if (CommonConstants.REACHABLE.equals(alert.getTarget())) {
// 可达性告警 需变更监控状态为不可达
monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_REACHABLE_CODE);
}
}
// 告警落库
alertService.addAlert(alert);
}
private void sendAlertDataListener(Alert alert) {
// todo 转发配置的邮件 微信 webhook
}
}

View File

@@ -66,7 +66,7 @@ public class MonitorController {
public ResponseEntity<Message<MonitorDto>> getMonitor(
@ApiParam(value = "监控ID", example = "6565463543") @PathVariable("id") long id) {
// 获取监控信息
MonitorDto monitorDto = monitorService.getMonitor(id);
MonitorDto monitorDto = monitorService.getMonitorDto(id);
Message.MessageBuilder<MonitorDto> messageBuilder = Message.builder();
if (monitorDto == null) {
messageBuilder.code(MONITOR_NOT_EXIST_CODE).msg("Monitor not exist.");

View File

@@ -5,6 +5,7 @@ import com.usthe.manager.pojo.entity.Monitor;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import java.util.List;
import java.util.Set;
@@ -36,4 +37,12 @@ public interface MonitorDao extends JpaRepository<Monitor, Long>, JpaSpecificati
*/
@Query("select new com.usthe.manager.pojo.dto.AppCount(mo.app, COUNT(mo.id)) from Monitor mo group by mo.app")
List<AppCount> findAppsCount();
/**
* 更新指定监控的状态
* @param id 监控ID
* @param status 监控状态
*/
@Query("update Monitor set status = :status where id = :id")
void updateMonitorStatus(@Param(value = "id") Long id, @Param(value = "status") byte status);
}

View File

@@ -73,7 +73,7 @@ public interface MonitorService {
* @return MonitorDto
* @throws RuntimeException 查询过程中异常抛出
*/
MonitorDto getMonitor(long id) throws RuntimeException;
MonitorDto getMonitorDto(long id) throws RuntimeException;
/**
* 动态条件查询
@@ -100,4 +100,18 @@ public interface MonitorService {
* @return 监控类别与监控数量映射
*/
List<AppCount> getAllAppMonitorsCount();
/**
* 查询监控
* @param monitorId 监控ID
* @return 监控信息
*/
Monitor getMonitor(Long monitorId);
/**
* 更新指定监控的状态
* @param monitorId 监控ID
* @param status 监控状态
*/
void updateMonitorStatus(Long monitorId, byte status);
}

View File

@@ -264,7 +264,7 @@ public class MonitorServiceImpl implements MonitorService {
@Override
@Transactional(readOnly = true)
public MonitorDto getMonitor(long id) throws RuntimeException {
public MonitorDto getMonitorDto(long id) throws RuntimeException {
Optional<Monitor> monitorOptional = monitorDao.findById(id);
if (monitorOptional.isPresent()) {
Monitor monitor = monitorOptional.get();
@@ -335,4 +335,14 @@ public class MonitorServiceImpl implements MonitorService {
return monitorDao.findAppsCount();
}
@Override
public Monitor getMonitor(Long monitorId) {
return monitorDao.findById(monitorId).orElse(null);
}
@Override
public void updateMonitorStatus(Long monitorId, byte status) {
monitorDao.updateMonitorStatus(monitorId, status);
}
}

View File

@@ -78,8 +78,7 @@ CREATE TABLE alert_define
expr varchar(255) not null comment '告警触发条件表达式',
priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色',
duration int not null comment '触发告警后持续时间,单位s',
enable boolean not null default true comment '告警触发后是否发送',
delay int not null comment '告警延迟时间,即延迟多久再发送告警,单位s',
enable boolean not null default true comment '告警阈值开关',
template varchar(255) not null comment '告警通知模板内容',
creator varchar(100) comment '创建者',
modifier varchar(100) comment '最新修改者',
@@ -104,5 +103,22 @@ CREATE TABLE alert_define_monitor_bind
index index_bind (alert_define_id, monitor_id)
) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4;
-- ----------------------------
-- Table structure for alert
-- ----------------------------
DROP TABLE IF EXISTS alert ;
CREATE TABLE alert
(
id bigint not null auto_increment comment '告警ID',
monitor_id bigint not null comment '告警监控对象ID',
monitor_name varchar(100) comment '告警监控对象名称',
priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色',
status tinyint not null default 0 comment '告警状态: 0-待发送 1-已发送 2-已过期(已经超过持续时间)',
target varchar(255) not null comment '告警目标对象: 监控可用性-available 指标-app.metrics.field',
duration int not null comment '触发告警后持续时间,单位s',
content varchar(255) not null comment '告警通知实际内容',
gmt_create timestamp default current_timestamp comment 'create time',
primary key (id)
) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4;
COMMIT;