Junjie
2026-04-27 a69f9ef3839bbf759480a836f46a20809ccec402
fix: prevent running auto tune jobs on apply failure
2个文件已修改
101 ■■■■■ 已修改文件
src/main/java/com/zy/ai/service/impl/AutoTuneApplyServiceImpl.java 30 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/test/java/com/zy/ai/service/AutoTuneApplyServiceImplTest.java 71 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/zy/ai/service/impl/AutoTuneApplyServiceImpl.java
@@ -26,6 +26,8 @@
import com.zy.core.enums.RedisKeyType;
import com.zy.system.entity.Config;
import com.zy.system.service.ConfigService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.PlatformTransactionManager;
@@ -42,10 +44,11 @@
@Service("autoTuneApplyService")
public class AutoTuneApplyServiceImpl implements AutoTuneApplyService {
    private static final Logger LOGGER = LoggerFactory.getLogger(AutoTuneApplyServiceImpl.class);
    private static final String PROMPT_SCENE_CODE = "auto_tune_apply";
    private static final String DIRECTION_OUT = "OUT";
    private static final long APPLY_LOCK_SECONDS = 120L;
    private static final String APPLY_LOCK_BUSY_REASON = "AI自动调参正在执行,请稍后重试";
    private static final String APPLY_LOCK_BUSY_REASON = "申请调参锁失败,锁不可用,可能已有任务或 Redis 异常";
    @Autowired
    private AiAutoTuneJobService aiAutoTuneJobService;
@@ -72,7 +75,6 @@
        boolean dryRun = Boolean.TRUE.equals(safeRequest.getDryRun());
        Date now = new Date();
        AiAutoTuneJob job = createJob(safeRequest, dryRun, now);
        aiAutoTuneJobService.save(job);
        if (dryRun) {
            return applyDryRun(safeRequest, job, now);
@@ -109,7 +111,7 @@
        String lockKey = RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key;
        String lockToken = UUID.randomUUID().toString();
        if (!redisUtil.trySetStringIfAbsent(lockKey, lockToken, APPLY_LOCK_SECONDS)) {
            return rejectRealApplyForBusyLock(request, job, now);
            return rejectRealApplyForUnavailableLock(request, job, now, lockKey);
        }
        try {
@@ -143,22 +145,29 @@
                return buildResult(job, persistenceResult.getAuditChanges(), false);
            } catch (RuntimeException exception) {
                markWriteFailure(validatedChanges, exception);
                Date failureNow = new Date();
                AiAutoTuneJob failureJob = createJob(request, false, failureNow);
                ApplyPersistenceResult persistenceResult = persistApplyResultInTransaction(
                        job,
                        failureJob,
                        request,
                        validatedChanges,
                        false,
                        now,
                        failureNow,
                        false
                );
                return buildResult(job, persistenceResult.getAuditChanges(), false);
                return buildResult(failureJob, persistenceResult.getAuditChanges(), false);
            }
        } finally {
            redisUtil.compareAndDelete(lockKey, lockToken);
        }
    }
    private AutoTuneApplyResult rejectRealApplyForBusyLock(AutoTuneApplyRequest request, AiAutoTuneJob job, Date now) {
    private AutoTuneApplyResult rejectRealApplyForUnavailableLock(AutoTuneApplyRequest request,
                                                                  AiAutoTuneJob job,
                                                                  Date now,
                                                                  String lockKey) {
        boolean lockKeyExists = redisUtil.hasKey(lockKey);
        LOGGER.warn("申请AI自动调参 apply 锁失败,lockKey={}, lockKeyExists={}", lockKey, lockKeyExists);
        List<ValidatedChange> validatedChanges = buildLockBusyChanges(request);
        ApplyPersistenceResult persistenceResult = persistApplyResultInTransaction(
                job,
@@ -397,6 +406,7 @@
                                                     boolean dryRun,
                                                     Date now,
                                                     boolean writeTargets) {
        saveJob(job);
        boolean refreshConfigCache = false;
        if (writeTargets) {
            refreshConfigCache = applyValidatedChanges(validatedChanges);
@@ -408,6 +418,12 @@
        return new ApplyPersistenceResult(auditChanges, refreshConfigCache);
    }
    private void saveJob(AiAutoTuneJob job) {
        if (!aiAutoTuneJobService.save(job)) {
            throw new IllegalStateException("保存调参任务失败");
        }
    }
    private boolean applyValidatedChanges(List<ValidatedChange> validatedChanges) {
        boolean refreshConfigCache = false;
        for (ValidatedChange validatedChange : validatedChanges) {
src/test/java/com/zy/ai/service/AutoTuneApplyServiceImplTest.java
@@ -93,11 +93,15 @@
        AtomicLong jobId = new AtomicLong(100);
        when(aiAutoTuneJobService.save(any(AiAutoTuneJob.class))).thenAnswer(invocation -> {
            transactionManager.recordJobSaveCall();
            AiAutoTuneJob job = invocation.getArgument(0);
            job.setId(jobId.incrementAndGet());
            return true;
        });
        when(aiAutoTuneJobService.updateById(any(AiAutoTuneJob.class))).thenReturn(true);
        when(aiAutoTuneJobService.updateById(any(AiAutoTuneJob.class))).thenAnswer(invocation -> {
            transactionManager.recordJobUpdateCall();
            return true;
        });
        when(aiAutoTuneChangeService.saveBatch(any(Collection.class))).thenReturn(true);
        when(aiAutoTuneChangeService.list(any(Wrapper.class))).thenReturn(Collections.emptyList());
        when(configService.getConfigValue(eq("aiAutoTuneIntervalMinutes"), any())).thenReturn("10");
@@ -321,6 +325,10 @@
        assertEquals("failed", changes.get(0).getResultStatus());
        assertTrue(changes.get(0).getRejectReason().contains("db write failed"));
        assertEquals(1, transactionManager.getRollbackCount());
        assertTrue(transactionManager.getJobSaveInsideTransactionCount() > 0);
        assertTrue(transactionManager.getJobUpdateInsideTransactionCount() > 0);
        assertEquals(0, transactionManager.getJobSaveOutsideTransactionCount());
        assertEquals(0, transactionManager.getJobUpdateOutsideTransactionCount());
        verify(redisUtil).compareAndDelete(eq(RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key), anyString());
    }
@@ -340,6 +348,10 @@
        assertEquals("failed", changes.get(0).getResultStatus());
        assertTrue(changes.get(0).getRejectReason().contains("audit failed"));
        assertEquals(1, transactionManager.getRollbackCount());
        assertTrue(transactionManager.getJobSaveInsideTransactionCount() > 0);
        assertTrue(transactionManager.getJobUpdateInsideTransactionCount() > 0);
        assertEquals(0, transactionManager.getJobSaveOutsideTransactionCount());
        assertEquals(0, transactionManager.getJobUpdateOutsideTransactionCount());
        verify(configService).saveConfigValue("conveyorStationTaskLimit", "15");
        verify(configService, never()).refreshSystemConfigCache();
        verify(redisUtil).compareAndDelete(eq(RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key), anyString());
@@ -348,7 +360,10 @@
    @Test
    void jobUpdateFailureRollsBackTargetWriteTransaction() {
        when(configService.getOne(any(Wrapper.class))).thenReturn(config("conveyorStationTaskLimit", "10"));
        when(aiAutoTuneJobService.updateById(any(AiAutoTuneJob.class))).thenReturn(false);
        when(aiAutoTuneJobService.updateById(any(AiAutoTuneJob.class))).thenAnswer(invocation -> {
            transactionManager.recordJobUpdateCall();
            return false;
        });
        IllegalStateException exception = assertThrows(IllegalStateException.class,
                () -> service.apply(request(false, command("sys_config", null, "conveyorStationTaskLimit", "15"))));
@@ -356,6 +371,10 @@
        assertTrue(exception.getMessage().contains("更新调参任务状态失败"));
        assertEquals(2, transactionManager.getRollbackCount());
        assertEquals(0, transactionManager.getCommitCount());
        assertTrue(transactionManager.getJobSaveInsideTransactionCount() > 0);
        assertTrue(transactionManager.getJobUpdateInsideTransactionCount() > 0);
        assertEquals(0, transactionManager.getJobSaveOutsideTransactionCount());
        assertEquals(0, transactionManager.getJobUpdateOutsideTransactionCount());
        verify(configService).saveConfigValue("conveyorStationTaskLimit", "15");
        verify(configService, never()).refreshSystemConfigCache();
        verify(redisUtil).compareAndDelete(eq(RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key), anyString());
@@ -372,9 +391,15 @@
        assertFalse(result.getSuccess());
        assertEquals("failed", updatedJob.getStatus());
        assertEquals("failed", changes.get(0).getResultStatus());
        assertTrue(changes.get(0).getRejectReason().contains("正在执行"));
        assertTrue(changes.get(0).getRejectReason().contains("锁不可用"));
        assertTrue(changes.get(0).getRejectReason().contains("Redis"));
        assertTrue(transactionManager.getJobSaveInsideTransactionCount() > 0);
        assertTrue(transactionManager.getJobUpdateInsideTransactionCount() > 0);
        assertEquals(0, transactionManager.getJobSaveOutsideTransactionCount());
        assertEquals(0, transactionManager.getJobUpdateOutsideTransactionCount());
        verify(configService, never()).getOne(any(Wrapper.class));
        verify(configService, never()).saveConfigValue(any(), any());
        verify(redisUtil).hasKey(RedisKeyType.AI_AUTO_TUNE_APPLY_LOCK.key);
        verify(redisUtil, never()).compareAndDelete(anyString(), anyString());
    }
@@ -526,21 +551,45 @@
        private int beginCount;
        private int commitCount;
        private int rollbackCount;
        private boolean transactionActive;
        private int jobSaveInsideTransactionCount;
        private int jobSaveOutsideTransactionCount;
        private int jobUpdateInsideTransactionCount;
        private int jobUpdateOutsideTransactionCount;
        @Override
        public TransactionStatus getTransaction(TransactionDefinition definition) {
            beginCount++;
            transactionActive = true;
            return new SimpleTransactionStatus();
        }
        @Override
        public void commit(TransactionStatus status) {
            commitCount++;
            transactionActive = false;
        }
        @Override
        public void rollback(TransactionStatus status) {
            rollbackCount++;
            transactionActive = false;
        }
        public void recordJobSaveCall() {
            if (transactionActive) {
                jobSaveInsideTransactionCount++;
            } else {
                jobSaveOutsideTransactionCount++;
            }
        }
        public void recordJobUpdateCall() {
            if (transactionActive) {
                jobUpdateInsideTransactionCount++;
            } else {
                jobUpdateOutsideTransactionCount++;
            }
        }
        public int getCommitCount() {
@@ -550,5 +599,21 @@
        public int getRollbackCount() {
            return rollbackCount;
        }
        public int getJobSaveInsideTransactionCount() {
            return jobSaveInsideTransactionCount;
        }
        public int getJobSaveOutsideTransactionCount() {
            return jobSaveOutsideTransactionCount;
        }
        public int getJobUpdateInsideTransactionCount() {
            return jobUpdateInsideTransactionCount;
        }
        public int getJobUpdateOutsideTransactionCount() {
            return jobUpdateOutsideTransactionCount;
        }
    }
}