[collector,alerter]bugfix:monitors always timeout alert (#67)
This commit is contained in:
@@ -110,7 +110,7 @@ public class CalculateAlarm {
|
|||||||
} else {
|
} else {
|
||||||
// 其他异常
|
// 其他异常
|
||||||
alertBuilder.target(CommonConstants.AVAILABLE)
|
alertBuilder.target(CommonConstants.AVAILABLE)
|
||||||
.content("监控紧急可用性告警: " + metricsData.getCode().name());
|
.content("监控可用性告警: " + metricsData.getCode().name() + " : " + metricsData.getMsg());
|
||||||
triggeredMonitorStateAlertMap.put(monitorId, metricsData.getCode());
|
triggeredMonitorStateAlertMap.put(monitorId, metricsData.getCode());
|
||||||
dataQueue.addAlertData(alertBuilder.build());
|
dataQueue.addAlertData(alertBuilder.build());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,9 +105,13 @@ public class CommonDispatcher implements MetricsTaskDispatch, CollectDataDispatc
|
|||||||
.setId(timerJob.getJob().getMonitorId())
|
.setId(timerJob.getJob().getMonitorId())
|
||||||
.setApp(timerJob.getJob().getApp())
|
.setApp(timerJob.getJob().getApp())
|
||||||
.setMetrics(metricsTime.getMetrics().getName())
|
.setMetrics(metricsTime.getMetrics().getName())
|
||||||
|
.setPriority(metricsTime.getMetrics().getPriority())
|
||||||
.setTime(System.currentTimeMillis())
|
.setTime(System.currentTimeMillis())
|
||||||
.setCode(CollectRep.Code.TIMEOUT).setMsg("collect timeout").build();
|
.setCode(CollectRep.Code.TIMEOUT).setMsg("collect timeout").build();
|
||||||
|
log.error("[Collect Timeout]: \n{}", metricsData);
|
||||||
|
if (metricsData.getPriority() == 0) {
|
||||||
dispatchCollectData(metricsTime.timeout, metricsTime.getMetrics(), metricsData);
|
dispatchCollectData(metricsTime.timeout, metricsTime.getMetrics(), metricsData);
|
||||||
|
}
|
||||||
metricsTimeoutMonitorMap.remove(entry.getKey());
|
metricsTimeoutMonitorMap.remove(entry.getKey());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -165,8 +169,8 @@ public class CommonDispatcher implements MetricsTaskDispatch, CollectDataDispatc
|
|||||||
metricsSet.forEach(metricItem -> {
|
metricsSet.forEach(metricItem -> {
|
||||||
MetricsCollect metricsCollect = new MetricsCollect(metricItem, timeout, this);
|
MetricsCollect metricsCollect = new MetricsCollect(metricItem, timeout, this);
|
||||||
jobRequestQueue.addJob(metricsCollect);
|
jobRequestQueue.addJob(metricsCollect);
|
||||||
metricsTimeoutMonitorMap.put(job.getId() + "-" + metrics.getName(),
|
metricsTimeoutMonitorMap.put(job.getId() + "-" + metricItem.getName(),
|
||||||
new MetricsTime(System.currentTimeMillis(), metrics, timeout));
|
new MetricsTime(System.currentTimeMillis(), metricItem, timeout));
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// 当前执行级别的指标组列表未全执行完成,
|
// 当前执行级别的指标组列表未全执行完成,
|
||||||
@@ -185,8 +189,8 @@ public class CommonDispatcher implements MetricsTaskDispatch, CollectDataDispatc
|
|||||||
metricsSet.forEach(metricItem -> {
|
metricsSet.forEach(metricItem -> {
|
||||||
MetricsCollect metricsCollect = new MetricsCollect(metricItem, timeout, this);
|
MetricsCollect metricsCollect = new MetricsCollect(metricItem, timeout, this);
|
||||||
jobRequestQueue.addJob(metricsCollect);
|
jobRequestQueue.addJob(metricsCollect);
|
||||||
metricsTimeoutMonitorMap.put(job.getId() + "-" + metrics.getName(),
|
metricsTimeoutMonitorMap.put(job.getId() + "-" + metricItem.getName(),
|
||||||
new MetricsTime(System.currentTimeMillis(), metrics, timeout));
|
new MetricsTime(System.currentTimeMillis(), metricItem, timeout));
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// 当前执行级别的指标组列表未全执行完成,
|
// 当前执行级别的指标组列表未全执行完成,
|
||||||
|
|||||||
@@ -34,6 +34,10 @@ import java.util.stream.Collectors;
|
|||||||
@Slf4j
|
@Slf4j
|
||||||
@Data
|
@Data
|
||||||
public class MetricsCollect implements Runnable, Comparable<MetricsCollect> {
|
public class MetricsCollect implements Runnable, Comparable<MetricsCollect> {
|
||||||
|
/**
|
||||||
|
* 调度告警阈值时间 100ms
|
||||||
|
*/
|
||||||
|
private static final long WARN_DISPATCH_TIME = 100;
|
||||||
/**
|
/**
|
||||||
* 监控ID
|
* 监控ID
|
||||||
*/
|
*/
|
||||||
@@ -267,11 +271,15 @@ public class MetricsCollect implements Runnable, Comparable<MetricsCollect> {
|
|||||||
private CollectRep.MetricsData validateResponse(CollectRep.MetricsData.Builder builder) {
|
private CollectRep.MetricsData validateResponse(CollectRep.MetricsData.Builder builder) {
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
builder.setTime(endTime);
|
builder.setTime(endTime);
|
||||||
log.debug("[Collect]: newTime: {}, startTime: {}, spendTime: {}.", newTime, startTime, endTime - startTime);
|
long runningTime = endTime - startTime;
|
||||||
|
long allTime = endTime - newTime;
|
||||||
|
if (startTime - newTime >= WARN_DISPATCH_TIME) {
|
||||||
|
log.warn("[Collector Dispatch Warn, Dispatch Use {}ms.", startTime - newTime);
|
||||||
|
}
|
||||||
if (builder.getCode() != CollectRep.Code.SUCCESS) {
|
if (builder.getCode() != CollectRep.Code.SUCCESS) {
|
||||||
log.info("[Collect Fail] Reason: {}", builder.getMsg());
|
log.info("[Collect Failed, Run {}ms, All {}ms] Reason: {}", runningTime, allTime, builder.getMsg());
|
||||||
} else {
|
} else {
|
||||||
log.info("[Collect Success].");
|
log.info("[Collect Success, Run {}ms, All {}ms].", runningTime, allTime);
|
||||||
}
|
}
|
||||||
return builder.build();
|
return builder.build();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user