|
|
@@ -36,13 +36,15 @@ public class CalculateAlarm {
|
|
|
private AlerterDataQueue dataQueue;
|
|
|
private AlertDefineService alertDefineService;
|
|
|
private Map<String, Alert> triggeredAlertMap;
|
|
|
+ private Map<Long, CollectRep.Code> triggeredMonitorStateAlertMap;
|
|
|
|
|
|
- public CalculateAlarm (AlerterProperties properties, AlerterWorkerPool workerPool,
|
|
|
- AlerterDataQueue dataQueue, AlertDefineService alertDefineService) {
|
|
|
+ public CalculateAlarm (AlerterWorkerPool workerPool, AlerterDataQueue dataQueue,
|
|
|
+ AlertDefineService alertDefineService) {
|
|
|
this.workerPool = workerPool;
|
|
|
this.dataQueue = dataQueue;
|
|
|
this.alertDefineService = alertDefineService;
|
|
|
this.triggeredAlertMap = new ConcurrentHashMap<>(128);
|
|
|
+ this.triggeredMonitorStateAlertMap = new ConcurrentHashMap<>(128);
|
|
|
startCalculate();
|
|
|
}
|
|
|
|
|
|
@@ -68,29 +70,42 @@ public class CalculateAlarm {
|
|
|
long monitorId = metricsData.getId();
|
|
|
String app = metricsData.getApp();
|
|
|
String metrics = metricsData.getMetrics();
|
|
|
- // 先判断采集响应数据状态 UN_REACHABLE/UN_CONNECTABLE 则需发最高级别告警
|
|
|
- if (metricsData.getCode() != CollectRep.Code.SUCCESS) {
|
|
|
- // 采集异常
|
|
|
- Alert.AlertBuilder alertBuilder = Alert.builder()
|
|
|
- .monitorId(monitorId)
|
|
|
- .priority((byte) 0)
|
|
|
- .status((byte) 0)
|
|
|
- .times(1);
|
|
|
- if (metricsData.getCode() == CollectRep.Code.UN_REACHABLE) {
|
|
|
- // UN_REACHABLE 对端不可达(网络层icmp)
|
|
|
- alertBuilder.target(CommonConstants.REACHABLE)
|
|
|
- .content("监控紧急可达性告警: " + metricsData.getCode().name());
|
|
|
- dataQueue.addAlertData(alertBuilder.build());
|
|
|
- } else if (metricsData.getCode() == CollectRep.Code.UN_CONNECTABLE) {
|
|
|
- // UN_CONNECTABLE 对端连接失败(传输层tcp,udp)
|
|
|
- alertBuilder.target(CommonConstants.AVAILABLE)
|
|
|
- .content("监控紧急可用性告警: " + metricsData.getCode().name());
|
|
|
- dataQueue.addAlertData(alertBuilder.build());
|
|
|
- } else {
|
|
|
- // todo 其它规范异常 TIMEOUT ...
|
|
|
+ // 先判断调度优先级为0的指标组采集响应数据状态 UN_REACHABLE/UN_CONNECTABLE 则需发最高级别告警进行监控状态变更
|
|
|
+ if (metricsData.getPriority() == 0) {
|
|
|
+ if (metricsData.getCode() != CollectRep.Code.SUCCESS) {
|
|
|
+ // 采集异常
|
|
|
+ Alert.AlertBuilder alertBuilder = Alert.builder()
|
|
|
+ .monitorId(monitorId)
|
|
|
+ .priority((byte) 0)
|
|
|
+ .status((byte) 0)
|
|
|
+ .times(1);
|
|
|
+ if (metricsData.getCode() == CollectRep.Code.UN_REACHABLE) {
|
|
|
+ // UN_REACHABLE 对端不可达(网络层icmp)
|
|
|
+ alertBuilder.target(CommonConstants.REACHABLE)
|
|
|
+ .content("监控紧急可达性告警: " + metricsData.getCode().name());
|
|
|
+ triggeredMonitorStateAlertMap.put(monitorId, CollectRep.Code.UN_REACHABLE);
|
|
|
+ dataQueue.addAlertData(alertBuilder.build());
|
|
|
+ } else if (metricsData.getCode() == CollectRep.Code.UN_CONNECTABLE) {
|
|
|
+ // UN_CONNECTABLE 对端连接失败(传输层tcp,udp)
|
|
|
+ alertBuilder.target(CommonConstants.AVAILABLE)
|
|
|
+ .content("监控紧急可用性告警: " + metricsData.getCode().name());
|
|
|
+ triggeredMonitorStateAlertMap.put(monitorId, CollectRep.Code.UN_CONNECTABLE);
|
|
|
+ dataQueue.addAlertData(alertBuilder.build());
|
|
|
+ } else {
|
|
|
+ // todo 其它规范异常 TIMEOUT ...
|
|
|
+ return;
|
|
|
+ }
|
|
|
return;
|
|
|
+ } else {
|
|
|
+ // 判断关联监控之前是否有可用性或者不可达告警,发送恢复告警进行监控状态恢复
|
|
|
+ CollectRep.Code stateCode = triggeredMonitorStateAlertMap.remove(monitorId);
|
|
|
+ if (stateCode != null) {
|
|
|
+ // 发送告警恢复
|
|
|
+ Alert resumeAlert = Alert.builder()
|
|
|
+ .monitorId(monitorId).status((byte) 2).build();
|
|
|
+ dataQueue.addAlertData(resumeAlert);
|
|
|
+ }
|
|
|
}
|
|
|
- return;
|
|
|
}
|
|
|
// 查出此监控类型下的此指标集合下关联配置的告警定义信息
|
|
|
// field - define[]
|