[collector,alerter]bugfix:monitors always timeout alert (#67)
This commit is contained in:
@@ -110,7 +110,7 @@ public class CalculateAlarm {
|
||||
} else {
|
||||
// 其他异常
|
||||
alertBuilder.target(CommonConstants.AVAILABLE)
|
||||
.content("监控紧急可用性告警: " + metricsData.getCode().name());
|
||||
.content("监控可用性告警: " + metricsData.getCode().name() + " : " + metricsData.getMsg());
|
||||
triggeredMonitorStateAlertMap.put(monitorId, metricsData.getCode());
|
||||
dataQueue.addAlertData(alertBuilder.build());
|
||||
}
|
||||
|
||||
@@ -105,9 +105,13 @@ public class CommonDispatcher implements MetricsTaskDispatch, CollectDataDispatc
|
||||
.setId(timerJob.getJob().getMonitorId())
|
||||
.setApp(timerJob.getJob().getApp())
|
||||
.setMetrics(metricsTime.getMetrics().getName())
|
||||
.setPriority(metricsTime.getMetrics().getPriority())
|
||||
.setTime(System.currentTimeMillis())
|
||||
.setCode(CollectRep.Code.TIMEOUT).setMsg("collect timeout").build();
|
||||
dispatchCollectData(metricsTime.timeout, metricsTime.getMetrics(), metricsData);
|
||||
log.error("[Collect Timeout]: \n{}", metricsData);
|
||||
if (metricsData.getPriority() == 0) {
|
||||
dispatchCollectData(metricsTime.timeout, metricsTime.getMetrics(), metricsData);
|
||||
}
|
||||
metricsTimeoutMonitorMap.remove(entry.getKey());
|
||||
}
|
||||
}
|
||||
@@ -165,8 +169,8 @@ public class CommonDispatcher implements MetricsTaskDispatch, CollectDataDispatc
|
||||
metricsSet.forEach(metricItem -> {
|
||||
MetricsCollect metricsCollect = new MetricsCollect(metricItem, timeout, this);
|
||||
jobRequestQueue.addJob(metricsCollect);
|
||||
metricsTimeoutMonitorMap.put(job.getId() + "-" + metrics.getName(),
|
||||
new MetricsTime(System.currentTimeMillis(), metrics, timeout));
|
||||
metricsTimeoutMonitorMap.put(job.getId() + "-" + metricItem.getName(),
|
||||
new MetricsTime(System.currentTimeMillis(), metricItem, timeout));
|
||||
});
|
||||
} else {
|
||||
// 当前执行级别的指标组列表未全执行完成,
|
||||
@@ -185,8 +189,8 @@ public class CommonDispatcher implements MetricsTaskDispatch, CollectDataDispatc
|
||||
metricsSet.forEach(metricItem -> {
|
||||
MetricsCollect metricsCollect = new MetricsCollect(metricItem, timeout, this);
|
||||
jobRequestQueue.addJob(metricsCollect);
|
||||
metricsTimeoutMonitorMap.put(job.getId() + "-" + metrics.getName(),
|
||||
new MetricsTime(System.currentTimeMillis(), metrics, timeout));
|
||||
metricsTimeoutMonitorMap.put(job.getId() + "-" + metricItem.getName(),
|
||||
new MetricsTime(System.currentTimeMillis(), metricItem, timeout));
|
||||
});
|
||||
} else {
|
||||
// 当前执行级别的指标组列表未全执行完成,
|
||||
|
||||
@@ -34,6 +34,10 @@ import java.util.stream.Collectors;
|
||||
@Slf4j
|
||||
@Data
|
||||
public class MetricsCollect implements Runnable, Comparable<MetricsCollect> {
|
||||
/**
|
||||
* 调度告警阈值时间 100ms
|
||||
*/
|
||||
private static final long WARN_DISPATCH_TIME = 100;
|
||||
/**
|
||||
* 监控ID
|
||||
*/
|
||||
@@ -267,11 +271,15 @@ public class MetricsCollect implements Runnable, Comparable<MetricsCollect> {
|
||||
private CollectRep.MetricsData validateResponse(CollectRep.MetricsData.Builder builder) {
|
||||
long endTime = System.currentTimeMillis();
|
||||
builder.setTime(endTime);
|
||||
log.debug("[Collect]: newTime: {}, startTime: {}, spendTime: {}.", newTime, startTime, endTime - startTime);
|
||||
long runningTime = endTime - startTime;
|
||||
long allTime = endTime - newTime;
|
||||
if (startTime - newTime >= WARN_DISPATCH_TIME) {
|
||||
log.warn("[Collector Dispatch Warn, Dispatch Use {}ms.", startTime - newTime);
|
||||
}
|
||||
if (builder.getCode() != CollectRep.Code.SUCCESS) {
|
||||
log.info("[Collect Fail] Reason: {}", builder.getMsg());
|
||||
log.info("[Collect Failed, Run {}ms, All {}ms] Reason: {}", runningTime, allTime, builder.getMsg());
|
||||
} else {
|
||||
log.info("[Collect Success].");
|
||||
log.info("[Collect Success, Run {}ms, All {}ms].", runningTime, allTime);
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user