Junjie
2026-04-27 892e45141a55bfea0e28c6a34a384f2ce8ee7d16
fix: make auto tune rollback auditable atomically
2个文件已修改
230 ■■■■ 已修改文件
src/main/java/com/zy/ai/service/impl/AutoTuneApplyServiceImpl.java 138 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/test/java/com/zy/ai/service/AutoTuneApplyServiceImplTest.java 92 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/zy/ai/service/impl/AutoTuneApplyServiceImpl.java
@@ -197,30 +197,58 @@
    public AutoTuneApplyResult rollbackLastSuccessfulJob(String reason) {
        Date now = new Date();
        AiAutoTuneJob rollbackJob = createRollbackJob(reason, now);
        aiAutoTuneJobService.save(rollbackJob);
        List<AiAutoTuneChange> sourceChanges = findLatestSuccessfulChanges();
        if (sourceChanges.isEmpty()) {
            rollbackJob.setStatus(AutoTuneJobStatus.REJECTED.getCode());
            rollbackJob.setFinishTime(now);
            rollbackJob.setRejectCount(0);
            rollbackJob.setSuccessCount(0);
            rollbackJob.setSummary("未找到可回滚的成功调参记录");
            rollbackJob.setErrorMessage("未找到可回滚的成功调参记录");
            aiAutoTuneJobService.updateById(rollbackJob);
            persistNoRollbackSourceJobInTransaction(rollbackJob, now);
            return buildResult(rollbackJob, new ArrayList<>(), false);
        }
        List<AiAutoTuneChange> rollbackChanges = new ArrayList<>();
        try {
            rollbackChanges = rollbackChangesInTransaction(rollbackJob.getId(), sourceChanges, now);
        } catch (RuntimeException exception) {
            rollbackChanges = buildFailedRollbackChanges(rollbackJob.getId(), sourceChanges, exception, now);
        String lockKey = RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key;
        String lockToken = UUID.randomUUID().toString();
        if (!redisUtil.trySetStringIfAbsent(lockKey, lockToken, APPLY_LOCK_SECONDS)) {
            return rejectRollbackForUnavailableLock(reason, sourceChanges, now, lockKey);
        }
        aiAutoTuneChangeService.saveBatch(rollbackChanges);
        finishRollbackJob(rollbackJob, rollbackChanges, now);
        aiAutoTuneJobService.updateById(rollbackJob);
        return buildResult(rollbackJob, rollbackChanges, false);
        try {
            try {
                RollbackPersistenceResult persistenceResult = persistRollbackResultInTransaction(
                        rollbackJob,
                        sourceChanges,
                        now
                );
                refreshRollbackConfigCacheIfNeeded(persistenceResult);
                return buildResult(rollbackJob, persistenceResult.getRollbackChanges(), false);
            } catch (RuntimeException exception) {
                Date failureNow = new Date();
                AiAutoTuneJob failureJob = createRollbackJob(reason, failureNow);
                RollbackPersistenceResult persistenceResult = persistFailedRollbackResultInTransaction(
                        failureJob,
                        sourceChanges,
                        exception,
                        failureNow
                );
                return buildResult(failureJob, persistenceResult.getRollbackChanges(), false);
            }
        } finally {
            redisUtil.compareAndDelete(lockKey, lockToken);
        }
    }
    private AutoTuneApplyResult rejectRollbackForUnavailableLock(String reason,
                                                                 List<AiAutoTuneChange> sourceChanges,
                                                                 Date now,
                                                                 String lockKey) {
        boolean lockKeyExists = redisUtil.hasKey(lockKey);
        LOGGER.warn("申请AI自动调参 rollback 锁失败,lockKey={}, lockKeyExists={}", lockKey, lockKeyExists);
        AiAutoTuneJob rollbackJob = createRollbackJob(reason, now);
        RollbackPersistenceResult persistenceResult = persistFailedRollbackResultInTransaction(
                rollbackJob,
                sourceChanges,
                new IllegalStateException(APPLY_LOCK_BUSY_REASON),
                now
        );
        return buildResult(rollbackJob, persistenceResult.getRollbackChanges(), false);
    }
    private List<ValidatedChange> validateChanges(AutoTuneApplyRequest request, boolean dryRun, Date now) {
@@ -466,14 +494,55 @@
        }
    }
    private List<AiAutoTuneChange> rollbackChangesInTransaction(Long rollbackJobId,
                                                                List<AiAutoTuneChange> sourceChanges,
                                                                Date now) {
    private void persistNoRollbackSourceJobInTransaction(AiAutoTuneJob rollbackJob, Date now) {
        TransactionTemplate transactionTemplate = new TransactionTemplate(transactionManager);
        return transactionTemplate.execute(status -> rollbackChanges(rollbackJobId, sourceChanges, now));
        transactionTemplate.executeWithoutResult(status -> {
            saveJob(rollbackJob);
            rollbackJob.setStatus(AutoTuneJobStatus.REJECTED.getCode());
            rollbackJob.setFinishTime(now);
            rollbackJob.setRejectCount(0);
            rollbackJob.setSuccessCount(0);
            rollbackJob.setSummary("未找到可回滚的成功调参记录");
            rollbackJob.setErrorMessage("未找到可回滚的成功调参记录");
            updateJob(rollbackJob);
        });
    }
    private List<AiAutoTuneChange> rollbackChanges(Long rollbackJobId, List<AiAutoTuneChange> sourceChanges, Date now) {
    private RollbackPersistenceResult persistRollbackResultInTransaction(AiAutoTuneJob rollbackJob,
                                                                        List<AiAutoTuneChange> sourceChanges,
                                                                        Date now) {
        TransactionTemplate transactionTemplate = new TransactionTemplate(transactionManager);
        return transactionTemplate.execute(status -> {
            saveJob(rollbackJob);
            RollbackPersistenceResult rollbackResult = rollbackChanges(rollbackJob.getId(), sourceChanges, now);
            saveAuditChanges(rollbackResult.getRollbackChanges());
            finishRollbackJob(rollbackJob, rollbackResult.getRollbackChanges(), now);
            updateJob(rollbackJob);
            return rollbackResult;
        });
    }
    private RollbackPersistenceResult persistFailedRollbackResultInTransaction(AiAutoTuneJob rollbackJob,
                                                                              List<AiAutoTuneChange> sourceChanges,
                                                                              RuntimeException exception,
                                                                              Date now) {
        TransactionTemplate transactionTemplate = new TransactionTemplate(transactionManager);
        return transactionTemplate.execute(status -> {
            saveJob(rollbackJob);
            List<AiAutoTuneChange> rollbackChanges = buildFailedRollbackChanges(
                    rollbackJob.getId(),
                    sourceChanges,
                    exception,
                    now
            );
            saveAuditChanges(rollbackChanges);
            finishRollbackJob(rollbackJob, rollbackChanges, now);
            updateJob(rollbackJob);
            return new RollbackPersistenceResult(rollbackChanges, false);
        });
    }
    private RollbackPersistenceResult rollbackChanges(Long rollbackJobId, List<AiAutoTuneChange> sourceChanges, Date now) {
        List<AiAutoTuneChange> rollbackChanges = new ArrayList<>();
        boolean refreshConfigCache = false;
        for (AiAutoTuneChange sourceChange : sourceChanges) {
@@ -492,10 +561,13 @@
            rollbackChange.setResultStatus(ChangeStatus.SUCCESS.getCode());
            rollbackChanges.add(rollbackChange);
        }
        if (refreshConfigCache) {
        return new RollbackPersistenceResult(rollbackChanges, refreshConfigCache);
    }
    private void refreshRollbackConfigCacheIfNeeded(RollbackPersistenceResult persistenceResult) {
        if (persistenceResult != null && persistenceResult.isRefreshConfigCache()) {
            configService.refreshSystemConfigCache();
        }
        return rollbackChanges;
    }
    private void writeValue(String targetType, String targetId, String targetKey, String value) {
@@ -936,6 +1008,24 @@
        }
    }
    private static class RollbackPersistenceResult {
        private final List<AiAutoTuneChange> rollbackChanges;
        private final boolean refreshConfigCache;
        private RollbackPersistenceResult(List<AiAutoTuneChange> rollbackChanges, boolean refreshConfigCache) {
            this.rollbackChanges = rollbackChanges == null ? new ArrayList<>() : rollbackChanges;
            this.refreshConfigCache = refreshConfigCache;
        }
        public List<AiAutoTuneChange> getRollbackChanges() {
            return rollbackChanges;
        }
        public boolean isRefreshConfigCache() {
            return refreshConfigCache;
        }
    }
    private static class ValidatedChange {
        private final AutoTuneChangeCommand command;
        private final String targetType;
src/test/java/com/zy/ai/service/AutoTuneApplyServiceImplTest.java
@@ -427,6 +427,98 @@
        verify(configService).saveConfigValue("conveyorStationTaskLimit", "10");
        verify(configService).refreshSystemConfigCache();
        verify(basStationService).update(any(Wrapper.class));
        verify(redisUtil).compareAndDelete(eq(RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key), anyString());
        assertEquals(0, transactionManager.getJobSaveOutsideTransactionCount());
        assertEquals(0, transactionManager.getJobUpdateOutsideTransactionCount());
    }
    @Test
    void rollbackAuditSaveBatchFailureRollsBackTargetWriteAndReturnsFailedAudit() {
        AiAutoTuneJob latestRealJob = job(10L, "manual", "success");
        AiAutoTuneChange configChange = successChange(10L, "sys_config", "", "conveyorStationTaskLimit", "10", "15");
        when(aiAutoTuneChangeService.list(any(Wrapper.class)))
                .thenReturn(List.of(configChange))
                .thenReturn(List.of(configChange));
        when(aiAutoTuneJobService.getById(10L)).thenReturn(latestRealJob);
        when(configService.getOne(any(Wrapper.class))).thenReturn(config("conveyorStationTaskLimit", "15"));
        when(aiAutoTuneChangeService.saveBatch(any(Collection.class)))
                .thenThrow(new IllegalStateException("rollback audit failed"))
                .thenReturn(true);
        AutoTuneApplyResult result = service.rollbackLastSuccessfulJob("manual rollback");
        List<AiAutoTuneChange> changes = savedChanges();
        AiAutoTuneJob updatedJob = updatedJob();
        assertFalse(result.getSuccess());
        assertEquals("failed", updatedJob.getStatus());
        assertEquals("failed", changes.get(0).getResultStatus());
        assertTrue(changes.get(0).getRejectReason().contains("rollback audit failed"));
        assertEquals(1, transactionManager.getRollbackCount());
        assertTrue(transactionManager.getJobSaveInsideTransactionCount() > 0);
        assertTrue(transactionManager.getJobUpdateInsideTransactionCount() > 0);
        assertEquals(0, transactionManager.getJobSaveOutsideTransactionCount());
        assertEquals(0, transactionManager.getJobUpdateOutsideTransactionCount());
        verify(configService).saveConfigValue("conveyorStationTaskLimit", "10");
        verify(configService, never()).refreshSystemConfigCache();
        verify(redisUtil).compareAndDelete(eq(RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key), anyString());
    }
    @Test
    void rollbackJobUpdateFailureRollsBackAndDoesNotSaveRunningJobOutsideTransaction() {
        AiAutoTuneJob latestRealJob = job(10L, "manual", "success");
        AiAutoTuneChange configChange = successChange(10L, "sys_config", "", "conveyorStationTaskLimit", "10", "15");
        when(aiAutoTuneChangeService.list(any(Wrapper.class)))
                .thenReturn(List.of(configChange))
                .thenReturn(List.of(configChange));
        when(aiAutoTuneJobService.getById(10L)).thenReturn(latestRealJob);
        when(configService.getOne(any(Wrapper.class))).thenReturn(config("conveyorStationTaskLimit", "15"));
        when(aiAutoTuneJobService.updateById(any(AiAutoTuneJob.class))).thenAnswer(invocation -> {
            transactionManager.recordJobUpdateCall();
            return false;
        });
        IllegalStateException exception = assertThrows(IllegalStateException.class,
                () -> service.rollbackLastSuccessfulJob("manual rollback"));
        assertTrue(exception.getMessage().contains("更新调参任务状态失败"));
        assertEquals(2, transactionManager.getRollbackCount());
        assertEquals(0, transactionManager.getCommitCount());
        assertTrue(transactionManager.getJobSaveInsideTransactionCount() > 0);
        assertTrue(transactionManager.getJobUpdateInsideTransactionCount() > 0);
        assertEquals(0, transactionManager.getJobSaveOutsideTransactionCount());
        assertEquals(0, transactionManager.getJobUpdateOutsideTransactionCount());
        verify(configService).saveConfigValue("conveyorStationTaskLimit", "10");
        verify(configService, never()).refreshSystemConfigCache();
        verify(redisUtil).compareAndDelete(eq(RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key), anyString());
    }
    @Test
    void rollbackLockNotAcquiredReturnsFailedAuditWithoutTargetWrite() {
        AiAutoTuneJob latestRealJob = job(10L, "manual", "success");
        AiAutoTuneChange configChange = successChange(10L, "sys_config", "", "conveyorStationTaskLimit", "10", "15");
        when(aiAutoTuneChangeService.list(any(Wrapper.class)))
                .thenReturn(List.of(configChange))
                .thenReturn(List.of(configChange));
        when(aiAutoTuneJobService.getById(10L)).thenReturn(latestRealJob);
        when(redisUtil.trySetStringIfAbsent(anyString(), anyString(), anyLong())).thenReturn(false);
        AutoTuneApplyResult result = service.rollbackLastSuccessfulJob("manual rollback");
        List<AiAutoTuneChange> changes = savedChanges();
        AiAutoTuneJob updatedJob = updatedJob();
        assertFalse(result.getSuccess());
        assertEquals("failed", updatedJob.getStatus());
        assertEquals("failed", changes.get(0).getResultStatus());
        assertTrue(changes.get(0).getRejectReason().contains("锁不可用"));
        assertTrue(changes.get(0).getRejectReason().contains("Redis"));
        assertTrue(transactionManager.getJobSaveInsideTransactionCount() > 0);
        assertTrue(transactionManager.getJobUpdateInsideTransactionCount() > 0);
        assertEquals(0, transactionManager.getJobSaveOutsideTransactionCount());
        assertEquals(0, transactionManager.getJobUpdateOutsideTransactionCount());
        verify(configService, never()).saveConfigValue(any(), any());
        verify(configService, never()).refreshSystemConfigCache();
        verify(redisUtil).hasKey(RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key);
        verify(redisUtil, never()).compareAndDelete(anyString(), anyString());
    }
    @Test