From ab2d4511ec9a1dacbfca432bfa68c094c1418864 Mon Sep 17 00:00:00 2001 From: tomsun28 Date: Fri, 10 Dec 2021 16:56:39 +0800 Subject: [PATCH] =?UTF-8?q?[alerter,manager]=20=E5=91=8A=E8=AD=A6=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E5=85=A5=E5=BA=93=EF=BC=8C=E7=9B=91=E6=8E=A7=E7=8A=B6?= =?UTF-8?q?=E6=80=81=E5=8F=98=E6=9B=B4=E8=81=94=E5=8A=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../usthe/alert/calculate/CalculateAlarm.java | 32 ++++--- .../java/com/usthe/alert/dao/AlertDao.java | 14 +++ .../com/usthe/alert/dao/AlertDefineDao.java | 2 +- .../com/usthe/alert/pojo/entity/Alert.java | 3 +- .../usthe/alert/pojo/entity/AlertDefine.java | 6 +- .../com/usthe/alert/service/AlertService.java | 29 ++++++ .../alert/service/impl/AlertServiceImpl.java | 32 +++++++ .../main/resources/META-INF/spring.factories | 3 +- .../usthe/common/util/CommonConstants.java | 5 ++ .../component/alerter/DispatchAlarm.java | 89 +++++++++++++++++++ .../manager/controller/MonitorController.java | 2 +- .../com/usthe/manager/dao/MonitorDao.java | 9 ++ .../usthe/manager/service/MonitorService.java | 16 +++- .../service/impl/MonitorServiceImpl.java | 12 ++- manager/src/main/resources/db/schema.sql | 20 ++++- 15 files changed, 247 insertions(+), 27 deletions(-) create mode 100644 alerter/src/main/java/com/usthe/alert/dao/AlertDao.java create mode 100644 alerter/src/main/java/com/usthe/alert/service/AlertService.java create mode 100644 alerter/src/main/java/com/usthe/alert/service/impl/AlertServiceImpl.java create mode 100644 manager/src/main/java/com/usthe/manager/component/alerter/DispatchAlarm.java diff --git a/alerter/src/main/java/com/usthe/alert/calculate/CalculateAlarm.java b/alerter/src/main/java/com/usthe/alert/calculate/CalculateAlarm.java index b81d7ea..6e39d07 100644 --- a/alerter/src/main/java/com/usthe/alert/calculate/CalculateAlarm.java +++ b/alerter/src/main/java/com/usthe/alert/calculate/CalculateAlarm.java @@ -68,18 +68,24 @@ public class CalculateAlarm { // 先判断采集响应数据状态 UN_REACHABLE/UN_CONNECTABLE 则需发最高级别告警 if (metricsData.getCode() != CollectRep.Code.SUCCESS) { // 采集异常 - if (metricsData.getCode() == CollectRep.Code.UN_REACHABLE - || metricsData.getCode() == CollectRep.Code.UN_CONNECTABLE) { - // 连接型可用性异常 UN_REACHABLE 对端不可达(网络层icmp) UN_CONNECTABLE 对端连接失败(传输层tcp,udp) - Alert alert = Alert.builder() - .monitorId(monitorId) - .priority((byte) 0) - .status((byte) 0) - .target(CommonConstants.AVAILABLE) - .duration(300) - .content("监控紧急可用性告警: " + metricsData.getCode().name()) - .build(); - dataQueue.addAlertData(alert); + Alert.AlertBuilder alertBuilder = Alert.builder() + .monitorId(monitorId) + .priority((byte) 0) + .status((byte) 0) + .duration(300); + if (metricsData.getCode() == CollectRep.Code.UN_REACHABLE) { + // UN_REACHABLE 对端不可达(网络层icmp) + alertBuilder.target(CommonConstants.REACHABLE) + .content("监控紧急可达性告警: " + metricsData.getCode().name()); + dataQueue.addAlertData(alertBuilder.build()); + } else if (metricsData.getCode() == CollectRep.Code.UN_CONNECTABLE) { + // UN_CONNECTABLE 对端连接失败(传输层tcp,udp) + alertBuilder.target(CommonConstants.AVAILABLE) + .content("监控紧急可用性告警: " + metricsData.getCode().name()); + dataQueue.addAlertData(alertBuilder.build()); + } else { + // todo 其它规范异常 TIMEOUT ... + return; } return; } @@ -127,7 +133,7 @@ public class CalculateAlarm { Expression expression = AviatorEvaluator.compile(expr, true); Boolean match = (Boolean) expression.execute(fieldValueMap); if (match) { - // 阈值规则匹配,触发告警 todo 告警延迟delay参数实现 + // 阈值规则匹配,触发告警 Alert alert = Alert.builder() .monitorId(monitorId) .priority(define.getPriority()) diff --git a/alerter/src/main/java/com/usthe/alert/dao/AlertDao.java b/alerter/src/main/java/com/usthe/alert/dao/AlertDao.java new file mode 100644 index 0000000..3a80e99 --- /dev/null +++ b/alerter/src/main/java/com/usthe/alert/dao/AlertDao.java @@ -0,0 +1,14 @@ +package com.usthe.alert.dao; + +import com.usthe.alert.pojo.entity.Alert; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.JpaSpecificationExecutor; + +/** + * Alert 数据库操作 + * @author tom + * @date 2021/12/9 10:03 + */ +public interface AlertDao extends JpaRepository, JpaSpecificationExecutor { + +} diff --git a/alerter/src/main/java/com/usthe/alert/dao/AlertDefineDao.java b/alerter/src/main/java/com/usthe/alert/dao/AlertDefineDao.java index 8f0adf4..e755bb6 100644 --- a/alerter/src/main/java/com/usthe/alert/dao/AlertDefineDao.java +++ b/alerter/src/main/java/com/usthe/alert/dao/AlertDefineDao.java @@ -29,7 +29,7 @@ public interface AlertDefineDao extends JpaRepository, JpaSpe * @return 告警定义列表 */ @Query("select define from AlertDefine define join AlertDefineBind bind on bind.alertDefineId = define.id " + - "where bind.monitorId = :monitorId and define.metric = :metrics") + "where bind.monitorId = :monitorId and define.metric = :metrics and define.enable = true") List queryAlertDefinesByMonitor(@Param(value = "monitorId") Long monitorId, @Param(value = "metrics") String metrics); } diff --git a/alerter/src/main/java/com/usthe/alert/pojo/entity/Alert.java b/alerter/src/main/java/com/usthe/alert/pojo/entity/Alert.java index 999dc1a..77c8ca1 100644 --- a/alerter/src/main/java/com/usthe/alert/pojo/entity/Alert.java +++ b/alerter/src/main/java/com/usthe/alert/pojo/entity/Alert.java @@ -61,8 +61,7 @@ public class Alert { @ApiModelProperty(value = "告警目标对象: 监控可用性-available 指标-app.metrics.field", example = "1", accessMode = READ_WRITE, position = 4) - @Min(0) - @Max(2) + @Length(max = 255) private String target; @ApiModelProperty(value = "触发告警后持续时间,单位s", example = "60", accessMode = READ_WRITE, position = 7) diff --git a/alerter/src/main/java/com/usthe/alert/pojo/entity/AlertDefine.java b/alerter/src/main/java/com/usthe/alert/pojo/entity/AlertDefine.java index c2003ad..ccc2c28 100644 --- a/alerter/src/main/java/com/usthe/alert/pojo/entity/AlertDefine.java +++ b/alerter/src/main/java/com/usthe/alert/pojo/entity/AlertDefine.java @@ -70,13 +70,9 @@ public class AlertDefine { @Min(0) private int duration; - @ApiModelProperty(value = "告警触发后是否发送", example = "true", accessMode = READ_WRITE, position = 8) + @ApiModelProperty(value = "告警阈值开关", example = "true", accessMode = READ_WRITE, position = 8) private boolean enable = true; - @ApiModelProperty(value = "告警延迟时间,即延迟多久再发送告警,单位s", example = "300", accessMode = READ_WRITE, position = 9) - @Min(0) - private int delay; - @ApiModelProperty(value = "告警通知内容", example = "linux {monitor_name}: {monitor_id} cpu usage high", accessMode = READ_WRITE, position = 10) @Length(max = 1024) diff --git a/alerter/src/main/java/com/usthe/alert/service/AlertService.java b/alerter/src/main/java/com/usthe/alert/service/AlertService.java new file mode 100644 index 0000000..23a1040 --- /dev/null +++ b/alerter/src/main/java/com/usthe/alert/service/AlertService.java @@ -0,0 +1,29 @@ +package com.usthe.alert.service; + +import com.usthe.alert.pojo.entity.Alert; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.jpa.domain.Specification; + +/** + * 告警信息管理接口 + * @author tom + * @date 2021/12/9 10:06 + */ +public interface AlertService { + + /** + * 新增告警 + * @param alert 告警实体 + * @throws RuntimeException 新增过程异常抛出 + */ + void addAlert(Alert alert) throws RuntimeException; + + /** + * 动态条件查询 + * @param specification 查询条件 + * @param pageRequest 分页参数 + * @return 查询结果 + */ + Page getAlerts(Specification specification, PageRequest pageRequest); +} diff --git a/alerter/src/main/java/com/usthe/alert/service/impl/AlertServiceImpl.java b/alerter/src/main/java/com/usthe/alert/service/impl/AlertServiceImpl.java new file mode 100644 index 0000000..472ca57 --- /dev/null +++ b/alerter/src/main/java/com/usthe/alert/service/impl/AlertServiceImpl.java @@ -0,0 +1,32 @@ +package com.usthe.alert.service.impl; + +import com.usthe.alert.dao.AlertDao; +import com.usthe.alert.pojo.entity.Alert; +import com.usthe.alert.service.AlertService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.jpa.domain.Specification; +import org.springframework.stereotype.Service; + +/** + * 告警信息服务实现 + * @author tom + * @date 2021/12/10 15:39 + */ +@Service +public class AlertServiceImpl implements AlertService { + + @Autowired + private AlertDao alertDao; + + @Override + public void addAlert(Alert alert) throws RuntimeException { + alertDao.save(alert); + } + + @Override + public Page getAlerts(Specification specification, PageRequest pageRequest) { + return alertDao.findAll(specification, pageRequest); + } +} diff --git a/alerter/src/main/resources/META-INF/spring.factories b/alerter/src/main/resources/META-INF/spring.factories index c06e47e..83be499 100644 --- a/alerter/src/main/resources/META-INF/spring.factories +++ b/alerter/src/main/resources/META-INF/spring.factories @@ -1,9 +1,10 @@ org.springframework.boot.autoconfigure.EnableAutoConfiguration=\ com.usthe.alert.service.impl.AlertDefineServiceImpl,\ +com.usthe.alert.service.impl.AlertServiceImpl,\ com.usthe.alert.controller.AlertDefineController,\ com.usthe.alert.AlerterWorkerPool,\ com.usthe.alert.AlerterProperties,\ com.usthe.alert.AlerterDataQueue,\ -com.usthe.alert.entrance.KafkaDataConsume,\ com.usthe.alert.AlerterConfiguration,\ +com.usthe.alert.entrance.KafkaDataConsume,\ com.usthe.alert.calculate.CalculateAlarm \ No newline at end of file diff --git a/common/src/main/java/com/usthe/common/util/CommonConstants.java b/common/src/main/java/com/usthe/common/util/CommonConstants.java index 6356c35..0ee2964 100644 --- a/common/src/main/java/com/usthe/common/util/CommonConstants.java +++ b/common/src/main/java/com/usthe/common/util/CommonConstants.java @@ -87,4 +87,9 @@ public interface CommonConstants { * 可用性对象 */ String AVAILABLE = "available"; + + /** + * 可达性对象 + */ + String REACHABLE = "reachable"; } diff --git a/manager/src/main/java/com/usthe/manager/component/alerter/DispatchAlarm.java b/manager/src/main/java/com/usthe/manager/component/alerter/DispatchAlarm.java new file mode 100644 index 0000000..cb27d40 --- /dev/null +++ b/manager/src/main/java/com/usthe/manager/component/alerter/DispatchAlarm.java @@ -0,0 +1,89 @@ +package com.usthe.manager.component.alerter; + +import com.usthe.alert.AlerterDataQueue; +import com.usthe.alert.AlerterWorkerPool; +import com.usthe.alert.pojo.entity.Alert; +import com.usthe.alert.service.AlertService; +import com.usthe.common.util.CommonConstants; +import com.usthe.manager.pojo.entity.Monitor; +import com.usthe.manager.service.MonitorService; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; + +/** + * 告警信息入库分发 + * @author tom + * @date 2021/12/10 12:58 + */ +@Component +@Slf4j +public class DispatchAlarm { + + private AlerterWorkerPool workerPool; + private AlerterDataQueue dataQueue; + private AlertService alertService; + private MonitorService monitorService; + + public DispatchAlarm(AlerterWorkerPool workerPool, AlerterDataQueue dataQueue, + AlertService alertService, MonitorService monitorService) { + this.workerPool = workerPool; + this.dataQueue = dataQueue; + this.alertService = alertService; + this.monitorService = monitorService; + startDispatch(); + } + + private void startDispatch() { + Runnable runnable = () -> { + while (!Thread.currentThread().isInterrupted()) { + try { + Alert alert = dataQueue.pollAlertData(); + if (alert != null) { + // 判断告警类型入库 + storeAlertData(alert); + // 通知分发 + sendAlertDataListener(alert); + } + } catch (InterruptedException e) { + log.error(e.getMessage()); + } + } + }; + workerPool.executeJob(runnable); + workerPool.executeJob(runnable); + workerPool.executeJob(runnable); + } + + private void storeAlertData(Alert alert) { + // todo 过滤重复告警 使用 告警持续时间参数-duration 这个时间段的相同重复告警应该被过滤 + // todo 使用缓存不直接操作库 + Monitor monitor = monitorService.getMonitor(alert.getMonitorId()); + if (monitor == null) { + log.warn("Dispatch alarm the monitorId: {} not existed, ignored.", alert.getMonitorId()); + return; + } + alert.setMonitorName(monitor.getName()); + if (monitor.getStatus() == CommonConstants.UN_MANAGE_CODE) { + // 当监控未管理时 忽略静默其告警信息 + return; + } + if (monitor.getStatus() != CommonConstants.UN_AVAILABLE_CODE + && monitor.getStatus() != CommonConstants.UN_REACHABLE_CODE) { + if (CommonConstants.AVAILABLE.equals(alert.getTarget())) { + // 可用性告警 需变更监控状态为不可用 + monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_AVAILABLE_CODE); + } else if (CommonConstants.REACHABLE.equals(alert.getTarget())) { + // 可达性告警 需变更监控状态为不可达 + monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_REACHABLE_CODE); + } + } + // 告警落库 + alertService.addAlert(alert); + } + + private void sendAlertDataListener(Alert alert) { + // todo 转发配置的邮件 微信 webhook + } + + +} diff --git a/manager/src/main/java/com/usthe/manager/controller/MonitorController.java b/manager/src/main/java/com/usthe/manager/controller/MonitorController.java index 9b922e7..d6d3849 100644 --- a/manager/src/main/java/com/usthe/manager/controller/MonitorController.java +++ b/manager/src/main/java/com/usthe/manager/controller/MonitorController.java @@ -66,7 +66,7 @@ public class MonitorController { public ResponseEntity> getMonitor( @ApiParam(value = "监控ID", example = "6565463543") @PathVariable("id") long id) { // 获取监控信息 - MonitorDto monitorDto = monitorService.getMonitor(id); + MonitorDto monitorDto = monitorService.getMonitorDto(id); Message.MessageBuilder messageBuilder = Message.builder(); if (monitorDto == null) { messageBuilder.code(MONITOR_NOT_EXIST_CODE).msg("Monitor not exist."); diff --git a/manager/src/main/java/com/usthe/manager/dao/MonitorDao.java b/manager/src/main/java/com/usthe/manager/dao/MonitorDao.java index d435706..da42527 100644 --- a/manager/src/main/java/com/usthe/manager/dao/MonitorDao.java +++ b/manager/src/main/java/com/usthe/manager/dao/MonitorDao.java @@ -5,6 +5,7 @@ import com.usthe.manager.pojo.entity.Monitor; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.JpaSpecificationExecutor; import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import java.util.List; import java.util.Set; @@ -36,4 +37,12 @@ public interface MonitorDao extends JpaRepository, JpaSpecificati */ @Query("select new com.usthe.manager.pojo.dto.AppCount(mo.app, COUNT(mo.id)) from Monitor mo group by mo.app") List findAppsCount(); + + /** + * 更新指定监控的状态 + * @param id 监控ID + * @param status 监控状态 + */ + @Query("update Monitor set status = :status where id = :id") + void updateMonitorStatus(@Param(value = "id") Long id, @Param(value = "status") byte status); } diff --git a/manager/src/main/java/com/usthe/manager/service/MonitorService.java b/manager/src/main/java/com/usthe/manager/service/MonitorService.java index 65eb379..15cc1a6 100644 --- a/manager/src/main/java/com/usthe/manager/service/MonitorService.java +++ b/manager/src/main/java/com/usthe/manager/service/MonitorService.java @@ -73,7 +73,7 @@ public interface MonitorService { * @return MonitorDto * @throws RuntimeException 查询过程中异常抛出 */ - MonitorDto getMonitor(long id) throws RuntimeException; + MonitorDto getMonitorDto(long id) throws RuntimeException; /** * 动态条件查询 @@ -100,4 +100,18 @@ public interface MonitorService { * @return 监控类别与监控数量映射 */ List getAllAppMonitorsCount(); + + /** + * 查询监控 + * @param monitorId 监控ID + * @return 监控信息 + */ + Monitor getMonitor(Long monitorId); + + /** + * 更新指定监控的状态 + * @param monitorId 监控ID + * @param status 监控状态 + */ + void updateMonitorStatus(Long monitorId, byte status); } diff --git a/manager/src/main/java/com/usthe/manager/service/impl/MonitorServiceImpl.java b/manager/src/main/java/com/usthe/manager/service/impl/MonitorServiceImpl.java index 487b956..c3c985c 100644 --- a/manager/src/main/java/com/usthe/manager/service/impl/MonitorServiceImpl.java +++ b/manager/src/main/java/com/usthe/manager/service/impl/MonitorServiceImpl.java @@ -264,7 +264,7 @@ public class MonitorServiceImpl implements MonitorService { @Override @Transactional(readOnly = true) - public MonitorDto getMonitor(long id) throws RuntimeException { + public MonitorDto getMonitorDto(long id) throws RuntimeException { Optional monitorOptional = monitorDao.findById(id); if (monitorOptional.isPresent()) { Monitor monitor = monitorOptional.get(); @@ -335,4 +335,14 @@ public class MonitorServiceImpl implements MonitorService { return monitorDao.findAppsCount(); } + + @Override + public Monitor getMonitor(Long monitorId) { + return monitorDao.findById(monitorId).orElse(null); + } + + @Override + public void updateMonitorStatus(Long monitorId, byte status) { + monitorDao.updateMonitorStatus(monitorId, status); + } } diff --git a/manager/src/main/resources/db/schema.sql b/manager/src/main/resources/db/schema.sql index 86e11fc..2399460 100644 --- a/manager/src/main/resources/db/schema.sql +++ b/manager/src/main/resources/db/schema.sql @@ -78,8 +78,7 @@ CREATE TABLE alert_define expr varchar(255) not null comment '告警触发条件表达式', priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色', duration int not null comment '触发告警后持续时间,单位s', - enable boolean not null default true comment '告警触发后是否发送', - delay int not null comment '告警延迟时间,即延迟多久再发送告警,单位s', + enable boolean not null default true comment '告警阈值开关', template varchar(255) not null comment '告警通知模板内容', creator varchar(100) comment '创建者', modifier varchar(100) comment '最新修改者', @@ -104,5 +103,22 @@ CREATE TABLE alert_define_monitor_bind index index_bind (alert_define_id, monitor_id) ) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; +-- ---------------------------- +-- Table structure for alert +-- ---------------------------- +DROP TABLE IF EXISTS alert ; +CREATE TABLE alert +( + id bigint not null auto_increment comment '告警ID', + monitor_id bigint not null comment '告警监控对象ID', + monitor_name varchar(100) comment '告警监控对象名称', + priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色', + status tinyint not null default 0 comment '告警状态: 0-待发送 1-已发送 2-已过期(已经超过持续时间)', + target varchar(255) not null comment '告警目标对象: 监控可用性-available 指标-app.metrics.field', + duration int not null comment '触发告警后持续时间,单位s', + content varchar(255) not null comment '告警通知实际内容', + gmt_create timestamp default current_timestamp comment 'create time', + primary key (id) +) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; COMMIT; \ No newline at end of file