实时监控看板: 流程实例状态、任务积压、吞吐量、效率瓶颈监控
2025/9/6大约 13 分钟
在企业级BPM平台中,实时监控看板是确保业务流程稳定运行和及时发现问题的重要工具。通过直观的可视化界面,管理人员可以实时了解流程执行状态、任务处理情况、系统性能指标等关键信息。本章将深入探讨实时监控看板的设计与实现,帮助读者构建一个功能完善、响应迅速的监控系统。
实时监控的核心价值
状态透明化
实时监控看板将复杂的系统状态以直观的方式呈现,让管理人员能够一目了然地了解整个BPM平台的运行状况。
问题预警
通过设置合理的阈值和预警机制,监控看板能够在问题发生之前或发生初期及时发现异常,为问题处理争取宝贵时间。
决策支持
丰富的监控数据为管理决策提供了有力支撑,帮助管理者基于数据做出科学合理的决策。
监控看板架构设计
一个高效的实时监控看板需要具备良好的架构设计,以确保数据的实时性、准确性和可扩展性。
// 监控看板服务
@Service
public class MonitoringDashboardService {
@Autowired
private ProcessInstanceRepository processInstanceRepository;
@Autowired
private TaskRepository taskRepository;
@Autowired
private PerformanceMetricsRepository performanceMetricsRepository;
@Autowired
private AlertService alertService;
@Autowired
private RedisTemplate<String, Object> redisTemplate;
/**
* 获取实时监控数据
* @param dashboardRequest 监控请求
* @return 监控数据
*/
public RealTimeMonitoringData getRealTimeMonitoringData(DashboardRequest dashboardRequest) {
RealTimeMonitoringData monitoringData = new RealTimeMonitoringData();
monitoringData.setTimestamp(new Date());
try {
// 从缓存获取数据(如果存在且未过期)
String cacheKey = "monitoring_data:" + dashboardRequest.getDashboardId();
RealTimeMonitoringData cachedData = (RealTimeMonitoringData)
redisTemplate.opsForValue().get(cacheKey);
if (cachedData != null &&
(System.currentTimeMillis() - cachedData.getTimestamp().getTime()) < 30000) {
// 使用缓存数据(30秒内有效)
return cachedData;
}
// 获取流程实例状态统计
ProcessInstanceStats processStats = getProcessInstanceStats(dashboardRequest);
monitoringData.setProcessInstanceStats(processStats);
// 获取任务积压统计
TaskBacklogStats taskBacklogStats = getTaskBacklogStats(dashboardRequest);
monitoringData.setTaskBacklogStats(taskBacklogStats);
// 获取吞吐量统计
ThroughputStats throughputStats = getThroughputStats(dashboardRequest);
monitoringData.setThroughputStats(throughputStats);
// 获取效率瓶颈分析
BottleneckAnalysis bottleneckAnalysis = getBottleneckAnalysis(dashboardRequest);
monitoringData.setBottleneckAnalysis(bottleneckAnalysis);
// 获取系统健康状态
SystemHealthStatus healthStatus = getSystemHealthStatus();
monitoringData.setSystemHealthStatus(healthStatus);
// 缓存数据(5秒)
redisTemplate.opsForValue().set(cacheKey, monitoringData, 5, TimeUnit.SECONDS);
} catch (Exception e) {
log.error("获取实时监控数据失败", e);
monitoringData.setError("获取监控数据失败: " + e.getMessage());
}
return monitoringData;
}
/**
* 获取流程实例状态统计
* @param request 请求参数
* @return 流程实例统计
*/
private ProcessInstanceStats getProcessInstanceStats(DashboardRequest request) {
ProcessInstanceStats stats = new ProcessInstanceStats();
// 获取时间范围
Date startTime = request.getStartTime();
Date endTime = request.getEndTime();
// 统计各状态的流程实例数量
stats.setTotalCount(processInstanceRepository.countByTimeRange(startTime, endTime));
stats.setRunningCount(processInstanceRepository.countByStatusAndTimeRange(
ProcessStatus.RUNNING, startTime, endTime));
stats.setCompletedCount(processInstanceRepository.countByStatusAndTimeRange(
ProcessStatus.COMPLETED, startTime, endTime));
stats.setTerminatedCount(processInstanceRepository.countByStatusAndTimeRange(
ProcessStatus.TERMINATED, startTime, endTime));
stats.setSuspendedCount(processInstanceRepository.countByStatusAndTimeRange(
ProcessStatus.SUSPENDED, startTime, endTime));
stats.setErrorCount(processInstanceRepository.countByStatusAndTimeRange(
ProcessStatus.ERROR, startTime, endTime));
// 计算完成率
if (stats.getTotalCount() > 0) {
stats.setCompletionRate((double) stats.getCompletedCount() / stats.getTotalCount() * 100);
}
return stats;
}
/**
* 获取任务积压统计
* @param request 请求参数
* @return 任务积压统计
*/
private TaskBacklogStats getTaskBacklogStats(DashboardRequest request) {
TaskBacklogStats stats = new TaskBacklogStats();
Date startTime = request.getStartTime();
Date endTime = request.getEndTime();
// 统计各类任务数量
stats.setTotalCount(taskRepository.countByTimeRange(startTime, endTime));
stats.setPendingCount(taskRepository.countByStatusAndTimeRange(
TaskStatus.PENDING, startTime, endTime));
stats.setAssignedCount(taskRepository.countByStatusAndTimeRange(
TaskStatus.ASSIGNED, startTime, endTime));
stats.setInProgressCount(taskRepository.countByStatusAndTimeRange(
TaskStatus.IN_PROGRESS, startTime, endTime));
stats.setCompletedCount(taskRepository.countByStatusAndTimeRange(
TaskStatus.COMPLETED, startTime, endTime));
stats.setOverdueCount(taskRepository.countOverdueTasks());
// 统计高优先级任务
stats.setHighPriorityCount(taskRepository.countByPriorityAndTimeRange(
TaskPriority.HIGH, startTime, endTime));
// 计算积压率
if (stats.getTotalCount() > 0) {
stats.setBacklogRate((double) (stats.getPendingCount() + stats.getAssignedCount() +
stats.getInProgressCount()) / stats.getTotalCount() * 100);
}
return stats;
}
}流程实例状态监控
流程实例状态监控是监控看板的核心功能之一,它能够帮助管理人员了解当前所有流程实例的执行状态。
// 流程实例状态监控组件
@Component
public class ProcessInstanceStatusMonitor {
@Autowired
private ProcessInstanceRepository processInstanceRepository;
@Autowired
private MetricsService metricsService;
/**
* 获取流程实例状态分布
* @param timeRange 时间范围
* @return 状态分布数据
*/
public ProcessInstanceStatusDistribution getStatusDistribution(TimeRange timeRange) {
ProcessInstanceStatusDistribution distribution = new ProcessInstanceStatusDistribution();
// 按状态分组统计
Map<ProcessStatus, Long> statusCounts = processInstanceRepository
.countByStatusGroupedByTimeRange(timeRange.getStartTime(), timeRange.getEndTime());
distribution.setStatusCounts(statusCounts);
// 计算各状态占比
long totalCount = statusCounts.values().stream().mapToLong(Long::longValue).sum();
if (totalCount > 0) {
Map<ProcessStatus, Double> statusPercentages = new HashMap<>();
for (Map.Entry<ProcessStatus, Long> entry : statusCounts.entrySet()) {
statusPercentages.put(entry.getKey(),
(double) entry.getValue() / totalCount * 100);
}
distribution.setStatusPercentages(statusPercentages);
}
// 记录指标
metricsService.recordProcessInstanceStatusMetrics(statusCounts);
return distribution;
}
/**
* 获取异常流程实例列表
* @param limit 限制数量
* @return 异常流程实例列表
*/
public List<ProcessInstance> getAbnormalProcessInstances(int limit) {
// 获取错误状态的流程实例
List<ProcessInstance> errorInstances = processInstanceRepository
.findByStatusOrderByStartTimeDesc(ProcessStatus.ERROR, limit / 2);
// 获取超时的流程实例
List<ProcessInstance> timeoutInstances = processInstanceRepository
.findTimeoutInstances(new Date(System.currentTimeMillis() - 24 * 3600000L), limit / 2);
// 合并并排序
List<ProcessInstance> abnormalInstances = new ArrayList<>();
abnormalInstances.addAll(errorInstances);
abnormalInstances.addAll(timeoutInstances);
// 按开始时间倒序排序
abnormalInstances.sort((p1, p2) -> p2.getStartTime().compareTo(p1.getStartTime()));
// 限制数量
if (abnormalInstances.size() > limit) {
abnormalInstances = abnormalInstances.subList(0, limit);
}
return abnormalInstances;
}
/**
* 获取流程实例执行趋势
* @param timeRange 时间范围
* @param interval 时间间隔(小时)
* @return 执行趋势数据
*/
public ProcessInstanceTrendData getExecutionTrend(TimeRange timeRange, int interval) {
ProcessInstanceTrendData trendData = new ProcessInstanceTrendData();
// 按时间间隔分组统计
List<TimeSeriesDataPoint> startedInstances = processInstanceRepository
.countStartedInstancesByTimeRange(timeRange.getStartTime(),
timeRange.getEndTime(), interval);
List<TimeSeriesDataPoint> completedInstances = processInstanceRepository
.countCompletedInstancesByTimeRange(timeRange.getStartTime(),
timeRange.getEndTime(), interval);
trendData.setStartedInstances(startedInstances);
trendData.setCompletedInstances(completedInstances);
return trendData;
}
}任务积压监控
任务积压监控帮助管理人员及时发现未处理或超期的任务,确保业务流程的顺畅进行。
// 任务积压监控组件
@Component
public class TaskBacklogMonitor {
@Autowired
private TaskRepository taskRepository;
@Autowired
private UserRepository userRepository;
@Autowired
private AlertService alertService;
/**
* 获取任务积压统计
* @param timeRange 时间范围
* @return 积压统计
*/
public TaskBacklogStatistics getBacklogStatistics(TimeRange timeRange) {
TaskBacklogStatistics statistics = new TaskBacklogStatistics();
// 统计未处理任务
statistics.setPendingTasks(taskRepository.countByStatusAndTimeRange(
TaskStatus.PENDING, timeRange.getStartTime(), timeRange.getEndTime()));
// 统计已分配但未开始的任务
statistics.setAssignedButNotStartedTasks(taskRepository.countByStatusAndTimeRange(
TaskStatus.ASSIGNED, timeRange.getStartTime(), timeRange.getEndTime()));
// 统计进行中的任务
statistics.setInProgressTasks(taskRepository.countByStatusAndTimeRange(
TaskStatus.IN_PROGRESS, timeRange.getStartTime(), timeRange.getEndTime()));
// 统计超期任务
statistics.setOverdueTasks(taskRepository.countOverdueTasks());
// 按优先级统计
Map<TaskPriority, Integer> priorityStats = new HashMap<>();
priorityStats.put(TaskPriority.HIGH, taskRepository.countByPriorityAndTimeRange(
TaskPriority.HIGH, timeRange.getStartTime(), timeRange.getEndTime()));
priorityStats.put(TaskPriority.MEDIUM, taskRepository.countByPriorityAndTimeRange(
TaskPriority.MEDIUM, timeRange.getStartTime(), timeRange.getEndTime()));
priorityStats.put(TaskPriority.LOW, taskRepository.countByPriorityAndTimeRange(
TaskPriority.LOW, timeRange.getStartTime(), timeRange.getEndTime()));
statistics.setPriorityStatistics(priorityStats);
// 按用户统计积压任务
List<UserTaskBacklog> userBacklogs = getUserTaskBacklogs(timeRange);
statistics.setUserBacklogs(userBacklogs);
return statistics;
}
/**
* 获取用户任务积压情况
* @param timeRange 时间范围
* @return 用户积压统计
*/
private List<UserTaskBacklog> getUserTaskBacklogs(TimeRange timeRange) {
List<UserTaskBacklog> userBacklogs = new ArrayList<>();
// 获取所有用户
List<User> users = userRepository.findAll();
for (User user : users) {
UserTaskBacklog userBacklog = new UserTaskBacklog();
userBacklog.setUserId(user.getId());
userBacklog.setUserName(user.getName());
// 统计该用户的积压任务
int pendingCount = taskRepository.countByAssigneeAndStatusAndTimeRange(
user.getId(), TaskStatus.PENDING, timeRange.getStartTime(), timeRange.getEndTime());
int assignedCount = taskRepository.countByAssigneeAndStatusAndTimeRange(
user.getId(), TaskStatus.ASSIGNED, timeRange.getStartTime(), timeRange.getEndTime());
int inProgressCount = taskRepository.countByAssigneeAndStatusAndTimeRange(
user.getId(), TaskStatus.IN_PROGRESS, timeRange.getStartTime(), timeRange.getEndTime());
int overdueCount = taskRepository.countOverdueTasksByAssignee(user.getId());
userBacklog.setPendingCount(pendingCount);
userBacklog.setAssignedCount(assignedCount);
userBacklog.setInProgressCount(inProgressCount);
userBacklog.setOverdueCount(overdueCount);
userBacklog.setTotalBacklogCount(pendingCount + assignedCount + inProgressCount);
userBacklogs.add(userBacklog);
}
// 按积压任务数量排序
userBacklogs.sort((u1, u2) -> u2.getTotalBacklogCount() - u1.getTotalBacklogCount());
return userBacklogs;
}
/**
* 检查任务积压预警
*/
public void checkBacklogAlerts() {
try {
// 检查总体积压任务数量
int totalBacklog = taskRepository.countBacklogTasks();
if (totalBacklog > 1000) { // 阈值可配置
Alert alert = new Alert();
alert.setType(AlertType.TASK_BACKLOG_HIGH);
alert.setLevel(AlertLevel.WARNING);
alert.setTitle("任务积压预警");
alert.setMessage("当前积压任务数量: " + totalBacklog + ",超过预警阈值1000");
alert.setTimestamp(new Date());
alertService.sendAlert(alert);
}
// 检查超期任务数量
int overdueTasks = taskRepository.countOverdueTasks();
if (overdueTasks > 100) { // 阈值可配置
Alert alert = new Alert();
alert.setType(AlertType.TASK_OVERDUE_HIGH);
alert.setLevel(AlertLevel.CRITICAL);
alert.setTitle("超期任务预警");
alert.setMessage("当前超期任务数量: " + overdueTasks + ",超过预警阈值100");
alert.setTimestamp(new Date());
alertService.sendAlert(alert);
}
// 检查用户积压情况
List<User> users = userRepository.findAll();
for (User user : users) {
int userBacklog = taskRepository.countBacklogTasksByAssignee(user.getId());
if (userBacklog > 50) { // 阈值可配置
Alert alert = new Alert();
alert.setType(AlertType.USER_BACKLOG_HIGH);
alert.setLevel(AlertLevel.WARNING);
alert.setTitle("用户任务积压预警");
alert.setMessage("用户 " + user.getName() + " 积压任务数量: " + userBacklog + ",超过预警阈值50");
alert.setTargetUser(user.getId());
alert.setTimestamp(new Date());
alertService.sendAlert(alert);
}
}
} catch (Exception e) {
log.error("检查任务积压预警时发生错误", e);
}
}
}吞吐量监控
吞吐量监控帮助评估BPM平台的处理能力,识别性能瓶颈。
// 吞吐量监控组件
@Component
public class ThroughputMonitor {
@Autowired
private ProcessInstanceRepository processInstanceRepository;
@Autowired
private TaskRepository taskRepository;
@Autowired
private PerformanceMetricsRepository performanceMetricsRepository;
/**
* 获取流程吞吐量统计
* @param timeRange 时间范围
* @return 吞吐量统计
*/
public ProcessThroughputStats getProcessThroughputStats(TimeRange timeRange) {
ProcessThroughputStats stats = new ProcessThroughputStats();
// 计算流程实例处理速度
long processCount = processInstanceRepository.countCompletedByTimeRange(
timeRange.getStartTime(), timeRange.getEndTime());
long timeRangeMillis = timeRange.getEndTime().getTime() - timeRange.getStartTime().getTime();
double hours = timeRangeMillis / 3600000.0;
if (hours > 0) {
stats.setProcessesPerHour(processCount / hours);
}
stats.setTotalProcessed(processCount);
stats.setTimeRangeHours(hours);
// 按流程定义统计
Map<String, Long> processDefinitionStats = processInstanceRepository
.countCompletedByProcessDefinitionAndTimeRange(
timeRange.getStartTime(), timeRange.getEndTime());
stats.setProcessDefinitionStats(processDefinitionStats);
return stats;
}
/**
* 获取任务吞吐量统计
* @param timeRange 时间范围
* @return 任务吞吐量统计
*/
public TaskThroughputStats getTaskThroughputStats(TimeRange timeRange) {
TaskThroughputStats stats = new TaskThroughputStats();
// 计算任务处理速度
long taskCount = taskRepository.countCompletedByTimeRange(
timeRange.getStartTime(), timeRange.getEndTime());
long timeRangeMillis = timeRange.getEndTime().getTime() - timeRange.getStartTime().getTime();
double hours = timeRangeMillis / 3600000.0;
if (hours > 0) {
stats.setTasksPerHour(taskCount / hours);
}
stats.setTotalProcessed(taskCount);
stats.setTimeRangeHours(hours);
// 按任务类型统计
Map<String, Long> taskTypeStats = taskRepository
.countCompletedByTaskTypeAndTimeRange(
timeRange.getStartTime(), timeRange.getEndTime());
stats.setTaskTypeStats(taskTypeStats);
// 按用户统计
Map<String, Long> userStats = taskRepository
.countCompletedByUserAndTimeRange(
timeRange.getStartTime(), timeRange.getEndTime());
stats.setUserStats(userStats);
return stats;
}
/**
* 获取处理时间统计
* @param timeRange 时间范围
* @return 处理时间统计
*/
public ProcessingTimeStats getProcessingTimeStats(TimeRange timeRange) {
ProcessingTimeStats stats = new ProcessingTimeStats();
// 获取流程实例处理时间
List<Long> processDurations = processInstanceRepository
.getCompletedProcessDurations(timeRange.getStartTime(), timeRange.getEndTime());
if (!processDurations.isEmpty()) {
stats.setAverageProcessDuration(calculateAverage(processDurations));
stats.setMedianProcessDuration(calculateMedian(processDurations));
stats.setMaxProcessDuration(Collections.max(processDurations));
stats.setMinProcessDuration(Collections.min(processDurations));
}
// 获取任务处理时间
List<Long> taskDurations = taskRepository
.getCompletedTaskDurations(timeRange.getStartTime(), timeRange.getEndTime());
if (!taskDurations.isEmpty()) {
stats.setAverageTaskDuration(calculateAverage(taskDurations));
stats.setMedianTaskDuration(calculateMedian(taskDurations));
stats.setMaxTaskDuration(Collections.max(taskDurations));
stats.setMinTaskDuration(Collections.min(taskDurations));
}
return stats;
}
/**
* 计算平均值
* @param values 值列表
* @return 平均值
*/
private double calculateAverage(List<Long> values) {
return values.stream().mapToLong(Long::longValue).average().orElse(0.0);
}
/**
* 计算中位数
* @param values 值列表
* @return 中位数
*/
private long calculateMedian(List<Long> values) {
Collections.sort(values);
int size = values.size();
if (size == 0) return 0;
if (size % 2 == 1) {
return values.get(size / 2);
} else {
return (values.get(size / 2 - 1) + values.get(size / 2)) / 2;
}
}
}效率瓶颈识别
效率瓶颈识别帮助发现流程执行中的性能问题,为优化提供方向。
// 效率瓶颈识别组件
@Component
public class BottleneckDetector {
@Autowired
private ProcessInstanceRepository processInstanceRepository;
@Autowired
private TaskRepository taskRepository;
@Autowired
private ActivityLogRepository activityLogRepository;
/**
* 分析流程瓶颈
* @param analysisRequest 分析请求
* @return 瓶颈分析结果
*/
public BottleneckAnalysisResult analyzeBottlenecks(BottleneckAnalysisRequest analysisRequest) {
BottleneckAnalysisResult result = new BottleneckAnalysisResult();
result.setAnalysisTime(new Date());
try {
// 分析流程实例级别的瓶颈
List<ProcessBottleneck> processBottlenecks = analyzeProcessBottlenecks(analysisRequest);
result.setProcessBottlenecks(processBottlenecks);
// 分析任务级别的瓶颈
List<TaskBottleneck> taskBottlenecks = analyzeTaskBottlenecks(analysisRequest);
result.setTaskBottlenecks(taskBottlenecks);
// 分析活动级别的瓶颈
List<ActivityBottleneck> activityBottlenecks = analyzeActivityBottlenecks(analysisRequest);
result.setActivityBottlenecks(activityBottlenecks);
// 识别最严重的瓶颈
result.setTopBottlenecks(identifyTopBottlenecks(result));
} catch (Exception e) {
log.error("分析瓶颈时发生错误", e);
result.setError("分析失败: " + e.getMessage());
}
return result;
}
/**
* 分析流程实例级别的瓶颈
* @param request 分析请求
* @return 流程瓶颈列表
*/
private List<ProcessBottleneck> analyzeProcessBottlenecks(BottleneckAnalysisRequest request) {
List<ProcessBottleneck> bottlenecks = new ArrayList<>();
// 获取执行时间较长的流程实例
List<ProcessInstance> slowProcesses = processInstanceRepository
.findSlowProcesses(request.getTimeRange().getStartTime(),
request.getTimeRange().getEndTime(), request.getSlowThreshold());
for (ProcessInstance process : slowProcesses) {
ProcessBottleneck bottleneck = new ProcessBottleneck();
bottleneck.setProcessInstanceId(process.getId());
bottleneck.setProcessDefinitionKey(process.getProcessDefinitionKey());
bottleneck.setDuration(process.getDuration());
bottleneck.setStartTime(process.getStartTime());
bottleneck.setEndTime(process.getEndTime());
// 分析流程中的慢活动
List<ActivityLog> activityLogs = activityLogRepository
.findByProcessInstanceId(process.getId());
List<SlowActivity> slowActivities = new ArrayList<>();
for (ActivityLog log : activityLogs) {
if (log.getDuration() > request.getActivitySlowThreshold()) {
SlowActivity slowActivity = new SlowActivity();
slowActivity.setActivityId(log.getActivityId());
slowActivity.setActivityName(log.getActivityName());
slowActivity.setDuration(log.getDuration());
slowActivity.setStartTime(log.getStartTime());
slowActivity.setEndTime(log.getEndTime());
slowActivities.add(slowActivity);
}
}
bottleneck.setSlowActivities(slowActivities);
bottleneck.setSlowActivityCount(slowActivities.size());
bottlenecks.add(bottleneck);
}
// 按持续时间排序
bottlenecks.sort((b1, b2) -> Long.compare(b2.getDuration(), b1.getDuration()));
return bottlenecks;
}
/**
* 分析任务级别的瓶颈
* @param request 分析请求
* @return 任务瓶颈列表
*/
private List<TaskBottleneck> analyzeTaskBottlenecks(BottleneckAnalysisRequest request) {
List<TaskBottleneck> bottlenecks = new ArrayList<>();
// 获取处理时间较长的任务
List<Task> slowTasks = taskRepository
.findSlowTasks(request.getTimeRange().getStartTime(),
request.getTimeRange().getEndTime(), request.getTaskSlowThreshold());
for (Task task : slowTasks) {
TaskBottleneck bottleneck = new TaskBottleneck();
bottleneck.setTaskId(task.getId());
bottleneck.setTaskName(task.getTitle());
bottleneck.setAssignee(task.getAssignee());
bottleneck.setDuration(task.getDuration());
bottleneck.setCreateTime(task.getCreateTime());
bottleneck.setCompletionTime(task.getCompletionTime());
bottlenecks.add(bottleneck);
}
// 按持续时间排序
bottlenecks.sort((b1, b2) -> Long.compare(b2.getDuration(), b1.getDuration()));
return bottlenecks;
}
/**
* 识别最严重的瓶颈
* @param analysisResult 分析结果
* @return 最严重的瓶颈列表
*/
private List<Bottleneck> identifyTopBottlenecks(BottleneckAnalysisResult analysisResult) {
List<Bottleneck> topBottlenecks = new ArrayList<>();
// 添加流程瓶颈
for (ProcessBottleneck processBottleneck : analysisResult.getProcessBottlenecks()) {
if (processBottleneck.getSlowActivityCount() > 0) {
Bottleneck bottleneck = new Bottleneck();
bottleneck.setType(BottleneckType.PROCESS);
bottleneck.setSeverity(calculateProcessBottleneckSeverity(processBottleneck));
bottleneck.setDescription("流程 " + processBottleneck.getProcessDefinitionKey() +
" 执行时间过长,包含 " + processBottleneck.getSlowActivityCount() + " 个慢活动");
bottleneck.setDetails(processBottleneck);
topBottlenecks.add(bottleneck);
}
}
// 添加任务瓶颈
for (TaskBottleneck taskBottleneck : analysisResult.getTaskBottlenecks()) {
Bottleneck bottleneck = new Bottleneck();
bottleneck.setType(BottleneckType.TASK);
bottleneck.setSeverity(calculateTaskBottleneckSeverity(taskBottleneck));
bottleneck.setDescription("任务 " + taskBottleneck.getTaskName() + " 处理时间过长");
bottleneck.setDetails(taskBottleneck);
topBottlenecks.add(bottleneck);
}
// 按严重程度排序
topBottlenecks.sort((b1, b2) -> Integer.compare(b2.getSeverity(), b1.getSeverity()));
// 只返回前10个最严重的瓶颈
if (topBottlenecks.size() > 10) {
topBottlenecks = topBottlenecks.subList(0, 10);
}
return topBottlenecks;
}
/**
* 计算流程瓶颈严重程度
* @param bottleneck 流程瓶颈
* @return 严重程度(1-10)
*/
private int calculateProcessBottleneckSeverity(ProcessBottleneck bottleneck) {
// 基于持续时间和慢活动数量计算严重程度
int severity = 1;
// 持续时间超过阈值的倍数
long threshold = 3600000; // 1小时
if (bottleneck.getDuration() > threshold * 10) {
severity += 6;
} else if (bottleneck.getDuration() > threshold * 5) {
severity += 4;
} else if (bottleneck.getDuration() > threshold * 2) {
severity += 2;
}
// 慢活动数量
if (bottleneck.getSlowActivityCount() > 10) {
severity += 3;
} else if (bottleneck.getSlowActivityCount() > 5) {
severity += 2;
} else if (bottleneck.getSlowActivityCount() > 0) {
severity += 1;
}
return Math.min(severity, 10);
}
/**
* 计算任务瓶颈严重程度
* @param bottleneck 任务瓶颈
* @return 严重程度(1-10)
*/
private int calculateTaskBottleneckSeverity(TaskBottleneck bottleneck) {
// 基于持续时间计算严重程度
int severity = 1;
// 持续时间超过阈值的倍数
long threshold = 1800000; // 30分钟
if (bottleneck.getDuration() > threshold * 10) {
severity += 6;
} else if (bottleneck.getDuration() > threshold * 5) {
severity += 4;
} else if (bottleneck.getDuration() > threshold * 2) {
severity += 2;
}
return Math.min(severity, 10);
}
}监控看板前端实现
监控看板不仅需要强大的后端支持,还需要直观易用的前端界面来展示数据。
// 监控看板前端组件 (React示例)
import React, { useState, useEffect } from 'react';
import { LineChart, BarChart, PieChart } from 'react-chartjs-2';
import {
Card,
CardContent,
Grid,
Typography,
CircularProgress,
Alert
} from '@mui/material';
const MonitoringDashboard = () => {
const [monitoringData, setMonitoringData] = useState(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState(null);
useEffect(() => {
// 定时获取监控数据
const fetchData = async () => {
try {
setLoading(true);
const response = await fetch('/api/monitoring/dashboard', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
dashboardId: 'main-dashboard',
timeRange: {
startTime: new Date(Date.now() - 24 * 3600000),
endTime: new Date()
}
})
});
if (!response.ok) {
throw new Error('获取监控数据失败');
}
const data = await response.json();
setMonitoringData(data);
setError(null);
} catch (err) {
setError(err.message);
} finally {
setLoading(false);
}
};
// 初始获取数据
fetchData();
// 每30秒刷新一次数据
const interval = setInterval(fetchData, 30000);
return () => clearInterval(interval);
}, []);
if (loading) {
return (
<div style={{ display: 'flex', justifyContent: 'center', alignItems: 'center', height: '100vh' }}>
<CircularProgress />
</div>
);
}
if (error) {
return (
<div style={{ padding: '20px' }}>
<Alert severity="error">{error}</Alert>
</div>
);
}
// 流程实例状态图表数据
const processStatusData = {
labels: ['运行中', '已完成', '已终止', '已暂停', '错误'],
datasets: [{
data: [
monitoringData.processInstanceStats.runningCount,
monitoringData.processInstanceStats.completedCount,
monitoringData.processInstanceStats.terminatedCount,
monitoringData.processInstanceStats.suspendedCount,
monitoringData.processInstanceStats.errorCount
],
backgroundColor: [
'#4CAF50',
'#2196F3',
'#FF9800',
'#FFEB3B',
'#F44336'
]
}]
};
// 任务积压图表数据
const taskBacklogData = {
labels: ['待处理', '已分配', '进行中', '已完成', '超期'],
datasets: [{
label: '任务数量',
data: [
monitoringData.taskBacklogStats.pendingCount,
monitoringData.taskBacklogStats.assignedCount,
monitoringData.taskBacklogStats.inProgressCount,
monitoringData.taskBacklogStats.completedCount,
monitoringData.taskBacklogStats.overdueCount
],
backgroundColor: [
'#FF5722',
'#FF9800',
'#FFEB3B',
'#4CAF50',
'#F44336'
]
}]
};
return (
<div style={{ padding: '20px' }}>
<Typography variant="h4" gutterBottom>
BPM实时监控看板
</Typography>
<Grid container spacing={3}>
{/* 流程实例状态卡片 */}
<Grid item xs={12} md={6} lg={4}>
<Card>
<CardContent>
<Typography variant="h6" gutterBottom>
流程实例状态
</Typography>
<PieChart data={processStatusData} />
<Typography variant="body2" style={{ marginTop: '10px' }}>
总计: {monitoringData.processInstanceStats.totalCount} 个流程实例
</Typography>
<Typography variant="body2">
完成率: {monitoringData.processInstanceStats.completionRate.toFixed(2)}%
</Typography>
</CardContent>
</Card>
</Grid>
{/* 任务积压卡片 */}
<Grid item xs={12} md={6} lg={4}>
<Card>
<CardContent>
<Typography variant="h6" gutterBottom>
任务积压情况
</Typography>
<BarChart data={taskBacklogData} />
<Typography variant="body2" style={{ marginTop: '10px' }}>
总计: {monitoringData.taskBacklogStats.totalCount} 个任务
</Typography>
<Typography variant="body2">
积压率: {monitoringData.taskBacklogStats.backlogRate.toFixed(2)}%
</Typography>
</CardContent>
</Card>
</Grid>
{/* 吞吐量卡片 */}
<Grid item xs={12} md={6} lg={4}>
<Card>
<CardContent>
<Typography variant="h6" gutterBottom>
处理吞吐量
</Typography>
<Typography variant="body1">
流程处理速度: {monitoringData.throughputStats.processesPerHour.toFixed(2)} 个/小时
</Typography>
<Typography variant="body1">
任务处理速度: {monitoringData.throughputStats.tasksPerHour.toFixed(2)} 个/小时
</Typography>
<Typography variant="body1">
平均流程处理时间: {(monitoringData.throughputStats.averageProcessDuration / 60000).toFixed(2)} 分钟
</Typography>
<Typography variant="body1">
平均任务处理时间: {(monitoringData.throughputStats.averageTaskDuration / 60000).toFixed(2)} 分钟
</Typography>
</CardContent>
</Card>
</Grid>
{/* 效率瓶颈卡片 */}
<Grid item xs={12}>
<Card>
<CardContent>
<Typography variant="h6" gutterBottom>
效率瓶颈分析
</Typography>
{monitoringData.bottleneckAnalysis.topBottlenecks.length > 0 ? (
<ul>
{monitoringData.bottleneckAnalysis.topBottlenecks.map((bottleneck, index) => (
<li key={index}>
<Typography variant="body1">
<span style={{ color: getSeverityColor(bottleneck.severity) }}>
[严重程度: {bottleneck.severity}/10]
</span>{' '}
{bottleneck.description}
</Typography>
</li>
))}
</ul>
) : (
<Typography variant="body1" color="textSecondary">
当前未发现明显瓶颈
</Typography>
)}
</CardContent>
</Card>
</Grid>
</Grid>
</div>
);
};
// 根据严重程度获取颜色
const getSeverityColor = (severity) => {
if (severity >= 8) return '#F44336'; // 红色
if (severity >= 5) return '#FF9800'; // 橙色
return '#4CAF50'; // 绿色
};
export default MonitoringDashboard;最佳实践与注意事项
在实现实时监控看板时,需要注意以下最佳实践:
1. 性能优化
- 合理使用缓存机制,避免频繁查询数据库
- 对于大数据量的统计,考虑使用预计算或异步计算
- 优化前端渲染性能,避免频繁的DOM操作
2. 数据准确性
- 确保监控数据的实时性和准确性
- 建立数据校验机制,防止异常数据影响监控结果
- 定期清理过期的监控数据
3. 用户体验
- 提供直观、易懂的可视化界面
- 支持自定义监控指标和时间范围
- 提供预警和通知功能
4. 可扩展性
- 设计模块化的架构,便于功能扩展
- 支持多种数据源的集成
- 提供开放的API接口
通过合理设计和实现实时监控看板,可以显著提升BPM平台的可观察性和可管理性,为平台的稳定运行和持续优化提供有力保障。
