zy-wcs-master.git

parent: a3375698 | 补丁 | 提交 | ignore whitespace

Junjie

2026-03-03 6d29b4cb573525c1092a67ef37aacf7ef2233723

2个文件已删除

10个文件已添加

4个文件已修改

	src/main/java/com/zy/ai/config/LlmConfig.java	20 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/controller/LlmRouteConfigController.java	131 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/controller/WcsDiagnosisController.java	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/entity/LlmRouteConfig.java	249 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/mapper/LlmRouteConfigMapper.java	11 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/service/LlmChatService.java	566 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/service/LlmRouteConfigService.java	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/service/LlmRoutingService.java	270 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/service/PythonService.java	55 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/service/WcsDiagnosisService.java	19 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/java/com/zy/ai/service/impl/LlmRouteConfigServiceImpl.java	11 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/resources/application.yml	28 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/resources/mapper/LlmRouteConfigMapper.xml	29 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/resources/sql/20260303_add_ai_config_menu.sql	46 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/resources/sql/20260303_create_sys_llm_route.sql	32 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/main/webapp/views/ai/llm_config.html	421 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 src/main/java/com/zy/ai/config/LlmConfig.java

File was deleted

 src/main/java/com/zy/ai/controller/LlmRouteConfigController.java

New file
@@ -0,0 +1,131 @@
package com.zy.ai.controller;

import com.baomidou.mybatisplus.mapper.EntityWrapper;
import com.core.annotations.ManagerAuth;
import com.core.common.R;
import com.zy.ai.entity.LlmRouteConfig;
import com.zy.ai.service.LlmRouteConfigService;
import com.zy.ai.service.LlmRoutingService;
import com.zy.common.web.BaseController;
import lombok.RequiredArgsConstructor;
import org.springframework.web.bind.annotation.*;

import java.util.Date;
import java.util.List;
import java.util.Map;

@RestController
@RequestMapping("/ai/llm/config")
@RequiredArgsConstructor
public class LlmRouteConfigController extends BaseController {

    private final LlmRouteConfigService llmRouteConfigService;
    private final LlmRoutingService llmRoutingService;

    @GetMapping("/list/auth")
    @ManagerAuth
    public R list() {
        EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>();
        wrapper.orderBy("priority", true).orderBy("id", true);
        List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper);
        return R.ok(list);
    }

    @PostMapping("/save/auth")
    @ManagerAuth
    public R save(@RequestBody LlmRouteConfig config) {
        if (config == null) {
            return R.error("参数不能为空");
        }

        if (isBlank(config.getBaseUrl()) || isBlank(config.getApiKey()) || isBlank(config.getModel())) {
            return R.error("必须填写 baseUrl/apiKey/model");
        }

        if (config.getId() == null) {
            llmRoutingService.fillAndNormalize(config, true);
            llmRouteConfigService.insert(config);
        } else {
            LlmRouteConfig db = llmRouteConfigService.selectById(config.getId());
            if (db == null) {
                return R.error("配置不存在");
            }
            // 保留统计字段，避免前端误覆盖
            Integer failCount = db.getFailCount();
            Integer successCount = db.getSuccessCount();
            Integer consecutiveFailCount = db.getConsecutiveFailCount();
            Date lastFailTime = db.getLastFailTime();
            Date lastUsedTime = db.getLastUsedTime();
            String lastError = db.getLastError();

            llmRoutingService.fillAndNormalize(config, false);
            config.setFailCount(failCount);
            config.setSuccessCount(successCount);
            config.setConsecutiveFailCount(consecutiveFailCount);
            config.setLastFailTime(lastFailTime);
            config.setLastUsedTime(lastUsedTime);
            config.setLastError(lastError);
            config.setCreateTime(db.getCreateTime());
            llmRouteConfigService.updateById(config);
        }

        llmRoutingService.evictCache();
        return R.ok(config);
    }

    @PostMapping("/delete/auth")
    @ManagerAuth
    public R delete(@RequestParam("id") Long id) {
        if (id == null) {
            return R.error("id不能为空");
        }
        llmRouteConfigService.deleteById(id);
        llmRoutingService.evictCache();
        return R.ok();
    }

    @PostMapping("/clearCooldown/auth")
    @ManagerAuth
    public R clearCooldown(@RequestParam("id") Long id) {
        if (id == null) {
            return R.error("id不能为空");
        }
        LlmRouteConfig cfg = llmRouteConfigService.selectById(id);
        if (cfg == null) {
            return R.error("配置不存在");
        }
        cfg.setCooldownUntil(null);
        cfg.setConsecutiveFailCount(0);
        cfg.setUpdateTime(new Date());
        llmRouteConfigService.updateById(cfg);
        llmRoutingService.evictCache();
        return R.ok();
    }

    @PostMapping("/test/auth")
    @ManagerAuth
    public R test(@RequestBody LlmRouteConfig config) {
        if (config == null) {
            return R.error("参数不能为空");
        }
        if (isBlank(config.getBaseUrl()) || isBlank(config.getApiKey()) || isBlank(config.getModel())) {
            return R.error("测试失败：必须填写 baseUrl/apiKey/model");
        }
        Map<String, Object> data = llmRoutingService.testRoute(config);
        if (Boolean.TRUE.equals(data.get("ok")) && config.getId() != null) {
            LlmRouteConfig db = llmRouteConfigService.selectById(config.getId());
            if (db != null) {
                db.setCooldownUntil(null);
                db.setConsecutiveFailCount(0);
                db.setUpdateTime(new Date());
                llmRouteConfigService.updateById(db);
                llmRoutingService.evictCache();
            }
        }
        return R.ok(data);
    }

    private boolean isBlank(String s) {
        return s == null || s.trim().isEmpty();
    }
}

 src/main/java/com/zy/ai/controller/WcsDiagnosisController.java

@@ -33,7 +33,7 @@
                WcsDiagnosisRequest request = aiUtils.makeAiRequest(1000, "对当前系统进行巡检，如果有异常情况就进行详细的分析，如果没有异常情况则当成一次检查\n\n");
                wcsDiagnosisService.diagnoseStream(request, emitter);
            } catch (Exception e) {
                emitter.completeWithError(e);
                try { emitter.complete(); } catch (Exception ignore) {}
            }
        }).start();

@@ -50,7 +50,7 @@
                WcsDiagnosisRequest request = aiUtils.makeAiRequest(100, null);
                wcsDiagnosisService.askStream(request, prompt, chatId, reset, emitter);
            } catch (Exception e) {
                emitter.completeWithError(e);
                try { emitter.complete(); } catch (Exception ignore) {}
            }
        }).start();
        return emitter;

 src/main/java/com/zy/ai/entity/LlmRouteConfig.java

New file
@@ -0,0 +1,249 @@
package com.zy.ai.entity;

import com.baomidou.mybatisplus.annotations.TableField;
import com.baomidou.mybatisplus.annotations.TableId;
import com.baomidou.mybatisplus.annotations.TableName;
import com.baomidou.mybatisplus.enums.IdType;

import java.io.Serializable;
import java.util.Date;

@TableName("sys_llm_route")
public class LlmRouteConfig implements Serializable {

    private static final long serialVersionUID = 1L;

    @TableId(value = "id", type = IdType.AUTO)
    private Long id;

    private String name;

    @TableField("base_url")
    private String baseUrl;

    @TableField("api_key")
    private String apiKey;

    private String model;

    /**
     * 1 开启深度思考 0 关闭
     */
    private Short thinking;

    /**
     * 数字越小优先级越高
     */
    private Integer priority;

    /**
     * 1 启用 0 禁用
     */
    private Short status;

    @TableField("switch_on_quota")
    private Short switchOnQuota;

    @TableField("switch_on_error")
    private Short switchOnError;

    @TableField("cooldown_seconds")
    private Integer cooldownSeconds;

    @TableField("cooldown_until")
    private Date cooldownUntil;

    @TableField("fail_count")
    private Integer failCount;

    @TableField("success_count")
    private Integer successCount;

    @TableField("consecutive_fail_count")
    private Integer consecutiveFailCount;

    @TableField("last_error")
    private String lastError;

    @TableField("last_used_time")
    private Date lastUsedTime;

    @TableField("last_fail_time")
    private Date lastFailTime;

    @TableField("create_time")
    private Date createTime;

    @TableField("update_time")
    private Date updateTime;

    private String memo;

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getBaseUrl() {
        return baseUrl;
    }

    public void setBaseUrl(String baseUrl) {
        this.baseUrl = baseUrl;
    }

    public String getApiKey() {
        return apiKey;
    }

    public void setApiKey(String apiKey) {
        this.apiKey = apiKey;
    }

    public String getModel() {
        return model;
    }

    public void setModel(String model) {
        this.model = model;
    }

    public Short getThinking() {
        return thinking;
    }

    public void setThinking(Short thinking) {
        this.thinking = thinking;
    }

    public Integer getPriority() {
        return priority;
    }

    public void setPriority(Integer priority) {
        this.priority = priority;
    }

    public Short getStatus() {
        return status;
    }

    public void setStatus(Short status) {
        this.status = status;
    }

    public Short getSwitchOnQuota() {
        return switchOnQuota;
    }

    public void setSwitchOnQuota(Short switchOnQuota) {
        this.switchOnQuota = switchOnQuota;
    }

    public Short getSwitchOnError() {
        return switchOnError;
    }

    public void setSwitchOnError(Short switchOnError) {
        this.switchOnError = switchOnError;
    }

    public Integer getCooldownSeconds() {
        return cooldownSeconds;
    }

    public void setCooldownSeconds(Integer cooldownSeconds) {
        this.cooldownSeconds = cooldownSeconds;
    }

    public Date getCooldownUntil() {
        return cooldownUntil;
    }

    public void setCooldownUntil(Date cooldownUntil) {
        this.cooldownUntil = cooldownUntil;
    }

    public Integer getFailCount() {
        return failCount;
    }

    public void setFailCount(Integer failCount) {
        this.failCount = failCount;
    }

    public Integer getSuccessCount() {
        return successCount;
    }

    public void setSuccessCount(Integer successCount) {
        this.successCount = successCount;
    }

    public Integer getConsecutiveFailCount() {
        return consecutiveFailCount;
    }

    public void setConsecutiveFailCount(Integer consecutiveFailCount) {
        this.consecutiveFailCount = consecutiveFailCount;
    }

    public String getLastError() {
        return lastError;
    }

    public void setLastError(String lastError) {
        this.lastError = lastError;
    }

    public Date getLastUsedTime() {
        return lastUsedTime;
    }

    public void setLastUsedTime(Date lastUsedTime) {
        this.lastUsedTime = lastUsedTime;
    }

    public Date getLastFailTime() {
        return lastFailTime;
    }

    public void setLastFailTime(Date lastFailTime) {
        this.lastFailTime = lastFailTime;
    }

    public Date getCreateTime() {
        return createTime;
    }

    public void setCreateTime(Date createTime) {
        this.createTime = createTime;
    }

    public Date getUpdateTime() {
        return updateTime;
    }

    public void setUpdateTime(Date updateTime) {
        this.updateTime = updateTime;
    }

    public String getMemo() {
        return memo;
    }

    public void setMemo(String memo) {
        this.memo = memo;
    }
}

 src/main/java/com/zy/ai/mapper/LlmRouteConfigMapper.java

New file
@@ -0,0 +1,11 @@
package com.zy.ai.mapper;

import com.baomidou.mybatisplus.mapper.BaseMapper;
import com.zy.ai.entity.LlmRouteConfig;
import org.apache.ibatis.annotations.Mapper;
import org.springframework.stereotype.Repository;

@Mapper
@Repository
public interface LlmRouteConfigMapper extends BaseMapper<LlmRouteConfig> {
}

 src/main/java/com/zy/ai/service/LlmChatService.java

@@ -1,7 +1,11 @@
package com.zy.ai.service;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zy.ai.entity.ChatCompletionRequest;
import com.zy.ai.entity.ChatCompletionResponse;
import com.zy.ai.entity.LlmRouteConfig;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
@@ -9,37 +13,33 @@
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.publisher.Mono;
import reactor.core.publisher.Flux;

import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import java.util.function.Consumer;

@Slf4j
@Service
@RequiredArgsConstructor
public class LlmChatService {

    private final WebClient llmWebClient;
    private final LlmRoutingService llmRoutingService;

    @Value("${llm.api-key}")
    private String apiKey;
    @Value("${llm.base-url:}")
    private String fallbackBaseUrl;

    @Value("${llm.model}")
    private String model;
    @Value("${llm.api-key:}")
    private String fallbackApiKey;

    @Value("${llm.pythonPlatformUrl}")
    private String pythonPlatformUrl;
    @Value("${llm.model:}")
    private String fallbackModel;

    @Value("${llm.thinking}")
    private String thinking;
    @Value("${llm.thinking:false}")
    private String fallbackThinking;

    /**
     * 通用对话方法：传入 messages，返回大模型文本回复
@@ -49,27 +49,12 @@
                       Integer maxTokens) {

        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(false);

        ChatCompletionResponse response = llmWebClient.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .exchangeToMono(resp -> resp.bodyToFlux(String.class)
                        .collectList()
                        .map(list -> {
                            String payload = String.join("\n\n", list);
                            return parseCompletion(payload);
                        }))
                .doOnError(ex -> log.error("调用 LLM 失败", ex))
                .onErrorResume(ex -> Mono.empty())
                .block();
        ChatCompletionResponse response = complete(req);

        if (response == null ||
                response.getChoices() == null ||
@@ -88,17 +73,10 @@
                                                 Integer maxTokens,
                                                 List<Object> tools) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(false);

        if(thinking.equals("enable")) {
            ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking();
            thinking.setType("enable");
            req.setThinking(thinking);
        }
        if (tools != null && !tools.isEmpty()) {
            req.setTools(tools);
            req.setTool_choice("auto");
@@ -107,26 +85,42 @@
    }

    public ChatCompletionResponse complete(ChatCompletionRequest req) {
        try {
            return llmWebClient.post()
                    .uri("/chat/completions")
                    .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                    .contentType(MediaType.APPLICATION_JSON)
                    .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
                    .bodyValue(req)
                    .exchangeToMono(resp -> resp.bodyToFlux(String.class)
                            .collectList()
                            .map(list -> {
                                String payload = String.join("\n\n", list);
                                return parseCompletion(payload);
                            }))
                    .doOnError(ex -> log.error("调用 LLM 失败", ex))
                    .onErrorResume(ex -> Mono.empty())
                    .block();
        } catch (Exception e) {
            log.error("调用 LLM 失败", e);
        List<ResolvedRoute> routes = resolveRoutes();
        if (routes.isEmpty()) {
            log.error("调用 LLM 失败: 未配置可用 LLM 路由");
            return null;
        }

        Throwable last = null;
        for (int i = 0; i < routes.size(); i++) {
            ResolvedRoute route = routes.get(i);
            boolean hasNext = i < routes.size() - 1;
            try {
                ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, false);
                ChatCompletionResponse resp = callCompletion(route, routeReq);
                if (!isValidCompletion(resp)) {
                    throw new RuntimeException("LLM 响应为空");
                }
                markSuccess(route);
                return resp;
            } catch (Throwable ex) {
                last = ex;
                boolean quota = isQuotaExhausted(ex);
                boolean canSwitch = shouldSwitch(route, quota);
                markFailure(route, ex, canSwitch);
                if (hasNext && canSwitch) {
                    log.warn("LLM 切换到下一路由, current={}, reason={}", route.tag(), errorText(ex));
                    continue;
                }
                log.error("调用 LLM 失败, route={}", route.tag(), ex);
                break;
            }
        }

        if (last != null) {
            log.error("调用 LLM 全部路由失败: {}", errorText(last));
        }
        return null;
    }

    public void chatStream(List<ChatCompletionRequest.Message> messages,
@@ -137,92 +131,12 @@
                           Consumer<Throwable> onError) {

        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(true);


        Flux<String> flux = llmWebClient.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .retrieve()
                .bodyToFlux(String.class)
                .doOnError(ex -> log.error("调用 LLM 流式失败", ex));

        AtomicBoolean doneSeen = new AtomicBoolean(false);
        AtomicBoolean errorSeen = new AtomicBoolean(false);
        LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();

        Thread drain = new Thread(() -> {
            try {
                while (true) {
                    String s = queue.poll(2, TimeUnit.SECONDS);
                    if (s != null) {
                        try { onChunk.accept(s); } catch (Exception ignore) {}
                    }
                    if (doneSeen.get() && queue.isEmpty()) {
                        if (!errorSeen.get()) {
                            try { if (onComplete != null) onComplete.run(); } catch (Exception ignore) {}
                        }
                        break;
                    }
                }
            } catch (InterruptedException ignore) {
                ignore.printStackTrace();
            }
        });
        drain.setDaemon(true);
        drain.start();

        flux.subscribe(payload -> {
            if (payload == null || payload.isEmpty()) return;
            String[] events = payload.split("\\r?\\n\\r?\\n");
            for (String part : events) {
                String s = part;
                if (s == null || s.isEmpty()) continue;
                if (s.startsWith("data:")) {
                    s = s.substring(5);
                    if (s.startsWith(" ")) s = s.substring(1);
                }
                if ("[DONE]".equals(s.trim())) {
                    doneSeen.set(true);
                    continue;
                }
                try {
                    JSONObject obj = JSON.parseObject(s);
                    JSONArray choices = obj.getJSONArray("choices");
                    if (choices != null && !choices.isEmpty()) {
                        JSONObject c0 = choices.getJSONObject(0);
                        JSONObject delta = c0.getJSONObject("delta");
                        if (delta != null) {
                            String content = delta.getString("content");
                            if (content != null) {
                                try { queue.offer(content); } catch (Exception ignore) {}
                            }
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }, err -> {
            errorSeen.set(true);
            doneSeen.set(true);
            if (onError != null) onError.accept(err);
        }, () -> {
            if (!doneSeen.get()) {
                errorSeen.set(true);
                doneSeen.set(true);
                if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成"));
            } else {
                doneSeen.set(true);
            }
        });
        streamWithFailover(req, onChunk, onComplete, onError);
    }

    public void chatStreamWithTools(List<ChatCompletionRequest.Message> messages,
@@ -233,120 +147,46 @@
                                    Runnable onComplete,
                                    Consumer<Throwable> onError) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(true);
        if(thinking.equals("enable")) {
            ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking();
            thinking.setType("enable");
            req.setThinking(thinking);
        }
        if (tools != null && !tools.isEmpty()) {
            req.setTools(tools);
            req.setTool_choice("auto");
        }
        Flux<String> flux = llmWebClient.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .retrieve()
                .bodyToFlux(String.class)
                .doOnError(ex -> log.error("调用 LLM 流式失败", ex));

        AtomicBoolean doneSeen = new AtomicBoolean(false);
        AtomicBoolean errorSeen = new AtomicBoolean(false);
        LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();

        Thread drain = new Thread(() -> {
            try {
                while (true) {
                    String s = queue.poll(5, TimeUnit.SECONDS);
                    if (s != null) {
                        try { onChunk.accept(s); } catch (Exception ignore) {}
                    }
                    if (doneSeen.get() && queue.isEmpty()) {
                        if (!errorSeen.get()) {
                            try { if (onComplete != null) onComplete.run(); } catch (Exception ignore) {}
                        }
                        break;
                    }
                }
            } catch (InterruptedException ignore) {
                ignore.printStackTrace();
            }
        });
        drain.setDaemon(true);
        drain.start();

        flux.subscribe(payload -> {
            if (payload == null || payload.isEmpty()) return;
            String[] events = payload.split("\\r?\\n\\r?\\n");
            for (String part : events) {
                String s = part;
                if (s == null || s.isEmpty()) continue;
                if (s.startsWith("data:")) {
                    s = s.substring(5);
                    if (s.startsWith(" ")) s = s.substring(1);
                }
                if ("[DONE]".equals(s.trim())) {
                    doneSeen.set(true);
                    continue;
                }
                try {
                    JSONObject obj = JSON.parseObject(s);
                    JSONArray choices = obj.getJSONArray("choices");
                    if (choices != null && !choices.isEmpty()) {
                        JSONObject c0 = choices.getJSONObject(0);
                        JSONObject delta = c0.getJSONObject("delta");
                        if (delta != null) {
                            String content = delta.getString("content");
                            if (content != null) {
                                try { queue.offer(content); } catch (Exception ignore) {}
                            }
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }, err -> {
            errorSeen.set(true);
            doneSeen.set(true);
            if (onError != null) onError.accept(err);
        }, () -> {
            if (!doneSeen.get()) {
                errorSeen.set(true);
                doneSeen.set(true);
                if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成"));
            } else {
                doneSeen.set(true);
            }
        });
        streamWithFailover(req, onChunk, onComplete, onError);
    }

    public void chatStreamRunPython(String prompt, String chatId, Consumer<String> onChunk,
    private void streamWithFailover(ChatCompletionRequest req,
                                    Consumer<String> onChunk,
                                    Runnable onComplete,
                                    Consumer<Throwable> onError) {
        HashMap<String, Object> req = new HashMap<>();
        req.put("prompt", prompt);
        req.put("chatId", chatId);
        List<ResolvedRoute> routes = resolveRoutes();
        if (routes.isEmpty()) {
            if (onError != null) onError.accept(new RuntimeException("未配置可用 LLM 路由"));
            return;
        }
        attemptStream(routes, 0, req, onChunk, onComplete, onError);
    }

        Flux<String> flux = llmWebClient.post()
                .uri(pythonPlatformUrl)
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .retrieve()
                .bodyToFlux(String.class)
                .doOnError(ex -> log.error("调用 LLM 流式失败", ex));
    private void attemptStream(List<ResolvedRoute> routes,
                               int index,
                               ChatCompletionRequest req,
                               Consumer<String> onChunk,
                               Runnable onComplete,
                               Consumer<Throwable> onError) {
        if (index >= routes.size()) {
            if (onError != null) onError.accept(new RuntimeException("LLM 路由全部失败"));
            return;
        }

        ResolvedRoute route = routes.get(index);
        ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, true);

        AtomicBoolean doneSeen = new AtomicBoolean(false);
        AtomicBoolean errorSeen = new AtomicBoolean(false);
        AtomicBoolean emitted = new AtomicBoolean(false);
        LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();

        Thread drain = new Thread(() -> {
@@ -354,6 +194,7 @@
                while (true) {
                    String s = queue.poll(2, TimeUnit.SECONDS);
                    if (s != null) {
                        emitted.set(true);
                        try {
                            onChunk.accept(s);
                        } catch (Exception ignore) {
@@ -370,13 +211,12 @@
                    }
                }
            } catch (InterruptedException ignore) {
                ignore.printStackTrace();
            }
        });
        drain.setDaemon(true);
        drain.start();

        flux.subscribe(payload -> {
        streamFlux(route, routeReq).subscribe(payload -> {
            if (payload == null || payload.isEmpty()) return;
            String[] events = payload.split("\\r?\\n\\r?\\n");
            for (String part : events) {
@@ -390,10 +230,6 @@
                    doneSeen.set(true);
                    continue;
                }
                if("<think>".equals(s.trim()) || "</think>".equals(s.trim())) {
                    queue.offer(s.trim());
                    continue;
                }
                try {
                    JSONObject obj = JSON.parseObject(s);
                    JSONArray choices = obj.getJSONArray("choices");
@@ -403,30 +239,190 @@
                        if (delta != null) {
                            String content = delta.getString("content");
                            if (content != null) {
                                try {
                                    queue.offer(content);
                                } catch (Exception ignore) {
                                }
                                queue.offer(content);
                            }
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                    log.warn("解析 LLM stream 片段失败: {}", e.getMessage());
                }
            }
        }, err -> {
            errorSeen.set(true);
            doneSeen.set(true);
            boolean quota = isQuotaExhausted(err);
            boolean canSwitch = shouldSwitch(route, quota);
            markFailure(route, err, canSwitch);
            if (!emitted.get() && canSwitch && index < routes.size() - 1) {
                log.warn("LLM 路由失败，自动切换，current={}, reason={}", route.tag(), errorText(err));
                attemptStream(routes, index + 1, req, onChunk, onComplete, onError);
                return;
            }
            if (onError != null) onError.accept(err);
        }, () -> {
            if (!doneSeen.get()) {
                RuntimeException ex = new RuntimeException("LLM 流意外完成");
                errorSeen.set(true);
                doneSeen.set(true);
                if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成"));
                boolean canSwitch = shouldSwitch(route, false);
                markFailure(route, ex, canSwitch);
                if (!emitted.get() && canSwitch && index < routes.size() - 1) {
                    log.warn("LLM 路由流异常完成，自动切换，current={}", route.tag());
                    attemptStream(routes, index + 1, req, onChunk, onComplete, onError);
                } else {
                    if (onError != null) onError.accept(ex);
                }
            } else {
                markSuccess(route);
                doneSeen.set(true);
            }
        });
    }

    private Flux<String> streamFlux(ResolvedRoute route, ChatCompletionRequest req) {
        WebClient client = WebClient.builder().baseUrl(route.baseUrl).build();
        return client.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + route.apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .exchangeToFlux(resp -> {
                    int status = resp.rawStatusCode();
                    if (status >= 200 && status < 300) {
                        return resp.bodyToFlux(String.class);
                    }
                    return resp.bodyToMono(String.class)
                            .defaultIfEmpty("")
                            .flatMapMany(body -> Flux.error(new LlmRouteException(status, body)));
                })
                .doOnError(ex -> log.error("调用 LLM 流式失败, route={}", route.tag(), ex));
    }

    private ChatCompletionResponse callCompletion(ResolvedRoute route, ChatCompletionRequest req) {
        WebClient client = WebClient.builder().baseUrl(route.baseUrl).build();
        RawCompletionResult raw = client.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + route.apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .exchangeToMono(resp -> resp.bodyToFlux(String.class)
                        .collectList()
                        .map(list -> new RawCompletionResult(resp.rawStatusCode(), String.join("\\n\\n", list))))
                .block();

        if (raw == null) {
            throw new RuntimeException("LLM 返回为空");
        }
        if (raw.statusCode < 200 || raw.statusCode >= 300) {
            throw new LlmRouteException(raw.statusCode, raw.payload);
        }
        return parseCompletion(raw.payload);
    }

    private ChatCompletionRequest applyRoute(ChatCompletionRequest req, ResolvedRoute route, boolean stream) {
        req.setModel(route.model);
        req.setStream(stream);
        if (route.thinkingEnabled) {
            ChatCompletionRequest.Thinking t = new ChatCompletionRequest.Thinking();
            t.setType("enable");
            req.setThinking(t);
        } else {
            req.setThinking(null);
        }
        return req;
    }

    private ChatCompletionRequest cloneRequest(ChatCompletionRequest src) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(src.getModel());
        req.setMessages(src.getMessages());
        req.setTemperature(src.getTemperature());
        req.setMax_tokens(src.getMax_tokens());
        req.setStream(src.getStream());
        req.setTools(src.getTools());
        req.setTool_choice(src.getTool_choice());
        req.setThinking(src.getThinking());
        return req;
    }

    private boolean isValidCompletion(ChatCompletionResponse response) {
        if (response == null || response.getChoices() == null || response.getChoices().isEmpty()) {
            return false;
        }
        ChatCompletionRequest.Message message = response.getChoices().get(0).getMessage();
        if (message == null) {
            return false;
        }
        if (!isBlank(message.getContent())) {
            return true;
        }
        return message.getTool_calls() != null && !message.getTool_calls().isEmpty();
    }

    private boolean shouldSwitch(ResolvedRoute route, boolean quota) {
        return quota ? route.switchOnQuota : route.switchOnError;
    }

    private void markSuccess(ResolvedRoute route) {
        if (route.id != null) {
            llmRoutingService.markSuccess(route.id);
        }
    }

    private void markFailure(ResolvedRoute route, Throwable ex, boolean enterCooldown) {
        if (route.id != null) {
            llmRoutingService.markFailure(route.id, errorText(ex), enterCooldown, route.cooldownSeconds);
        }
    }

    private String errorText(Throwable ex) {
        if (ex == null) return "unknown";
        if (ex instanceof LlmRouteException) {
            LlmRouteException e = (LlmRouteException) ex;
            String body = e.body == null ? "" : e.body;
            if (body.length() > 240) {
                body = body.substring(0, 240);
            }
            return "status=" + e.statusCode + ", body=" + body;
        }
        return ex.getMessage() == null ? ex.toString() : ex.getMessage();
    }

    private boolean isQuotaExhausted(Throwable ex) {
        if (!(ex instanceof LlmRouteException)) return false;
        LlmRouteException e = (LlmRouteException) ex;
        if (e.statusCode == 429) return true;
        String text = (e.body == null ? "" : e.body).toLowerCase();
        return text.contains("insufficient_quota")
                || text.contains("quota")
                || text.contains("余额")
                || text.contains("用量")
                || text.contains("超限")
                || text.contains("rate limit");
    }

    private List<ResolvedRoute> resolveRoutes() {
        List<ResolvedRoute> routes = new ArrayList<>();
        List<LlmRouteConfig> dbRoutes = llmRoutingService.listAvailableRoutes();
        for (LlmRouteConfig c : dbRoutes) {
            routes.add(ResolvedRoute.fromDb(c));
        }
        // 兼容：数据库为空时，回退到 yml
        if (routes.isEmpty() && !isBlank(fallbackBaseUrl) && !isBlank(fallbackApiKey) && !isBlank(fallbackModel)) {
            routes.add(ResolvedRoute.fromFallback(fallbackBaseUrl, fallbackApiKey, fallbackModel, isFallbackThinkingEnabled()));
        }
        return routes;
    }

    private boolean isFallbackThinkingEnabled() {
        String x = fallbackThinking == null ? "" : fallbackThinking.trim().toLowerCase();
        return "true".equals(x) || "1".equals(x) || "enable".equals(x);
    }

    private boolean isBlank(String s) {
        return s == null || s.trim().isEmpty();
    }

    private ChatCompletionResponse mergeSseChunk(ChatCompletionResponse acc, String payload) {
@@ -452,7 +448,7 @@
                        ChatCompletionResponse.Choice choice = new ChatCompletionResponse.Choice();
                        ChatCompletionRequest.Message msg = new ChatCompletionRequest.Message();
                        choice.setMessage(msg);
                        java.util.ArrayList<ChatCompletionResponse.Choice> list = new java.util.ArrayList<>();
                        ArrayList<ChatCompletionResponse.Choice> list = new ArrayList<>();
                        list.add(choice);
                        acc.setChoices(list);
                    }
@@ -490,7 +486,8 @@
                if (created != null) acc.setCreated(created);
                String object = obj.getString("object");
                if (object != null && !object.isEmpty()) acc.setObjectName(object);
            } catch (Exception ignore) {}
            } catch (Exception ignore) {
            }
        }
        return acc;
    }
@@ -502,7 +499,8 @@
            if (r != null && r.getChoices() != null && !r.getChoices().isEmpty() && r.getChoices().get(0).getMessage() != null) {
                return r;
            }
        } catch (Exception ignore) {}
        } catch (Exception ignore) {
        }
        ChatCompletionResponse sse = mergeSseChunk(new ChatCompletionResponse(), payload);
        if (sse.getChoices() != null && !sse.getChoices().isEmpty() && sse.getChoices().get(0).getMessage() != null && sse.getChoices().get(0).getMessage().getContent() != null) {
            return sse;
@@ -513,9 +511,75 @@
        msg.setRole("assistant");
        msg.setContent(payload);
        choice.setMessage(msg);
        java.util.ArrayList<ChatCompletionResponse.Choice> list = new java.util.ArrayList<>();
        ArrayList<ChatCompletionResponse.Choice> list = new ArrayList<>();
        list.add(choice);
        r.setChoices(list);
        return r;
    }

    private static class RawCompletionResult {
        private final int statusCode;
        private final String payload;

        private RawCompletionResult(int statusCode, String payload) {
            this.statusCode = statusCode;
            this.payload = payload;
        }
    }

    private static class LlmRouteException extends RuntimeException {
        private final int statusCode;
        private final String body;

        private LlmRouteException(int statusCode, String body) {
            super("http status=" + statusCode);
            this.statusCode = statusCode;
            this.body = body;
        }
    }

    private static class ResolvedRoute {
        private Long id;
        private String name;
        private String baseUrl;
        private String apiKey;
        private String model;
        private boolean thinkingEnabled;
        private boolean switchOnQuota;
        private boolean switchOnError;
        private Integer cooldownSeconds;

        private static ResolvedRoute fromDb(LlmRouteConfig c) {
            ResolvedRoute r = new ResolvedRoute();
            r.id = c.getId();
            r.name = c.getName();
            r.baseUrl = c.getBaseUrl();
            r.apiKey = c.getApiKey();
            r.model = c.getModel();
            r.thinkingEnabled = c.getThinking() != null && c.getThinking() == 1;
            r.switchOnQuota = c.getSwitchOnQuota() == null || c.getSwitchOnQuota() == 1;
            r.switchOnError = c.getSwitchOnError() == null || c.getSwitchOnError() == 1;
            r.cooldownSeconds = c.getCooldownSeconds();
            return r;
        }

        private static ResolvedRoute fromFallback(String baseUrl, String apiKey, String model, boolean thinkingEnabled) {
            ResolvedRoute r = new ResolvedRoute();
            r.name = "fallback-yml";
            r.baseUrl = baseUrl;
            r.apiKey = apiKey;
            r.model = model;
            r.thinkingEnabled = thinkingEnabled;
            r.switchOnQuota = true;
            r.switchOnError = true;
            r.cooldownSeconds = 300;
            return r;
        }

        private String tag() {
            String showName = name == null ? "unnamed" : name;
            String showModel = model == null ? "" : (" model=" + model);
            return showName + showModel;
        }
    }
}

 src/main/java/com/zy/ai/service/LlmRouteConfigService.java

New file
@@ -0,0 +1,7 @@
package com.zy.ai.service;

import com.baomidou.mybatisplus.service.IService;
import com.zy.ai.entity.LlmRouteConfig;

public interface LlmRouteConfigService extends IService<LlmRouteConfig> {
}

 src/main/java/com/zy/ai/service/LlmRoutingService.java

New file
@@ -0,0 +1,270 @@
package com.zy.ai.service;

import com.baomidou.mybatisplus.mapper.EntityWrapper;
import com.zy.ai.entity.LlmRouteConfig;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.publisher.Mono;

import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@Slf4j
@Service
@RequiredArgsConstructor
public class LlmRoutingService {

    private static final long CACHE_TTL_MS = 3000L;

    private final LlmRouteConfigService llmRouteConfigService;

    private volatile List<LlmRouteConfig> allRouteCache = Collections.emptyList();
    private volatile long cacheExpireAt = 0L;

    public void evictCache() {
        cacheExpireAt = 0L;
    }

    public List<LlmRouteConfig> listAllOrdered() {
        return new ArrayList<>(loadAllRoutes());
    }

    public List<LlmRouteConfig> listAvailableRoutes() {
        Date now = new Date();
        List<LlmRouteConfig> result = new ArrayList<>();
        List<LlmRouteConfig> coolingRoutes = new ArrayList<>();
        int total = 0;
        int disabled = 0;
        int invalid = 0;
        for (LlmRouteConfig c : loadAllRoutes()) {
            total++;
            if (!isEnabled(c)) {
                disabled++;
                continue;
            }
            if (isBlank(c.getBaseUrl()) || isBlank(c.getApiKey()) || isBlank(c.getModel())) {
                invalid++;
                continue;
            }
            if (isCooling(c, now)) {
                coolingRoutes.add(c);
                continue;
            }
            result.add(c);
        }
        if (result.isEmpty() && !coolingRoutes.isEmpty()) {
            // 避免所有路由都处于冷却时系统完全不可用，降级允许使用冷却路由
            log.warn("LLM 路由均处于冷却，降级启用冷却路由。cooling={}, total={}", coolingRoutes.size(), total);
            return coolingRoutes;
        }
        if (result.isEmpty()) {
            log.warn("未找到可用 LLM 路由。total={}, disabled={}, invalid={}", total, disabled, invalid);
        }
        return result;
    }

    public void markSuccess(Long routeId) {
        if (routeId == null) return;
        try {
            LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
            if (db == null) return;
            db.setSuccessCount(nvl(db.getSuccessCount()) + 1);
            db.setConsecutiveFailCount(0);
            db.setLastUsedTime(new Date());
            db.setUpdateTime(new Date());
            llmRouteConfigService.updateById(db);
            evictCache();
        } catch (Exception e) {
            log.warn("更新路由成功状态失败, routeId={}", routeId, e);
        }
    }

    public void markFailure(Long routeId, String errorText, boolean enterCooldown, Integer cooldownSeconds) {
        if (routeId == null) return;
        try {
            LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
            if (db == null) return;
            Date now = new Date();
            db.setFailCount(nvl(db.getFailCount()) + 1);
            db.setConsecutiveFailCount(nvl(db.getConsecutiveFailCount()) + 1);
            db.setLastFailTime(now);
            db.setLastError(trimError(errorText));
            if (enterCooldown) {
                int sec = cooldownSeconds != null && cooldownSeconds > 0
                        ? cooldownSeconds
                        : defaultCooldown(db.getCooldownSeconds());
                db.setCooldownUntil(new Date(now.getTime() + sec * 1000L));
            }
            db.setUpdateTime(now);
            llmRouteConfigService.updateById(db);
            evictCache();
        } catch (Exception e) {
            log.warn("更新路由失败状态失败, routeId={}", routeId, e);
        }
    }

    private int defaultCooldown(Integer sec) {
        return sec == null || sec <= 0 ? 300 : sec;
    }

    private String trimError(String err) {
        if (err == null) return null;
        String x = err.replace("\n", " ").replace("\r", " ");
        return x.length() > 500 ? x.substring(0, 500) : x;
    }

    private Integer nvl(Integer x) {
        return x == null ? 0 : x;
    }

    private boolean isEnabled(LlmRouteConfig c) {
        return c != null && c.getStatus() != null && c.getStatus() == 1;
    }

    private boolean isCooling(LlmRouteConfig c, Date now) {
        return c != null && c.getCooldownUntil() != null && c.getCooldownUntil().after(now);
    }

    private List<LlmRouteConfig> loadAllRoutes() {
        long now = System.currentTimeMillis();
        if (now < cacheExpireAt && allRouteCache != null) {
            return allRouteCache;
        }
        synchronized (this) {
            now = System.currentTimeMillis();
            if (now < cacheExpireAt && allRouteCache != null) {
                return allRouteCache;
            }
            EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>();
            wrapper.orderBy("priority", true).orderBy("id", true);
            List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper);
            allRouteCache = list == null ? Collections.emptyList() : list;
            cacheExpireAt = System.currentTimeMillis() + CACHE_TTL_MS;
            return allRouteCache;
        }
    }

    private String safe(String s) {
        return s == null ? "" : s.trim();
    }

    private boolean isBlank(String s) {
        return s == null || s.trim().isEmpty();
    }

    public LlmRouteConfig fillAndNormalize(LlmRouteConfig cfg, boolean isCreate) {
        Date now = new Date();
        if (isBlank(cfg.getName())) {
            cfg.setName("LLM_ROUTE_" + now.getTime());
        }
        if (cfg.getThinking() == null) {
            cfg.setThinking((short) 0);
        }
        if (cfg.getPriority() == null) {
            cfg.setPriority(100);
        }
        if (cfg.getStatus() == null) {
            cfg.setStatus((short) 1);
        }
        if (cfg.getSwitchOnQuota() == null) {
            cfg.setSwitchOnQuota((short) 1);
        }
        if (cfg.getSwitchOnError() == null) {
            cfg.setSwitchOnError((short) 1);
        }
        if (cfg.getCooldownSeconds() == null || cfg.getCooldownSeconds() < 0) {
            cfg.setCooldownSeconds(300);
        }
        if (cfg.getFailCount() == null) {
            cfg.setFailCount(0);
        }
        if (cfg.getSuccessCount() == null) {
            cfg.setSuccessCount(0);
        }
        if (cfg.getConsecutiveFailCount() == null) {
            cfg.setConsecutiveFailCount(0);
        }
        if (isCreate) {
            cfg.setCreateTime(now);
        }
        cfg.setUpdateTime(now);
        return cfg;
    }

    public Map<String, Object> testRoute(LlmRouteConfig cfg) {
        HashMap<String, Object> result = new HashMap<>();
        long start = System.currentTimeMillis();
        try {
            TestHttpResult raw = testJavaRoute(cfg);
            fillTestResult(result, raw, start);
        } catch (Exception e) {
            result.put("ok", false);
            result.put("statusCode", -1);
            result.put("latencyMs", System.currentTimeMillis() - start);
            result.put("message", "测试异常: " + safe(e.getMessage()));
            result.put("responseSnippet", "");
        }
        return result;
    }

    private void fillTestResult(HashMap<String, Object> result, TestHttpResult raw, long start) {
        boolean ok = raw.statusCode >= 200 && raw.statusCode < 300;
        result.put("ok", ok);
        result.put("statusCode", raw.statusCode);
        result.put("latencyMs", System.currentTimeMillis() - start);
        result.put("message", ok ? "测试成功" : "测试失败");
        result.put("responseSnippet", trimBody(raw.body));
    }

    private TestHttpResult testJavaRoute(LlmRouteConfig cfg) {
        HashMap<String, Object> req = new HashMap<>();
        req.put("model", cfg.getModel());
        List<Map<String, String>> messages = new ArrayList<>();
        HashMap<String, String> msg = new HashMap<>();
        msg.put("role", "user");
        msg.put("content", "ping");
        messages.add(msg);
        req.put("messages", messages);
        req.put("stream", false);
        req.put("max_tokens", 8);
        req.put("temperature", 0);

        WebClient client = WebClient.builder().baseUrl(cfg.getBaseUrl()).build();
        return client.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + cfg.getApiKey())
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .exchangeToMono(resp -> resp.bodyToMono(String.class)
                        .defaultIfEmpty("")
                        .map(body -> new TestHttpResult(resp.rawStatusCode(), body)))
                .timeout(Duration.ofSeconds(12))
                .onErrorResume(ex -> Mono.just(new TestHttpResult(-1, safe(ex.getMessage()))))
                .block();
    }

    private String trimBody(String body) {
        String x = safe(body).replace("\r", " ").replace("\n", " ");
        return x.length() > 300 ? x.substring(0, 300) : x;
    }

    private static class TestHttpResult {
        private final int statusCode;
        private final String body;

        private TestHttpResult(int statusCode, String body) {
            this.statusCode = statusCode;
            this.body = body;
        }
    }
}

 src/main/java/com/zy/ai/service/PythonService.java

File was deleted

 src/main/java/com/zy/ai/service/WcsDiagnosisService.java

@@ -11,7 +11,6 @@
import com.zy.common.utils.RedisUtil;
import com.zy.core.enums.RedisKeyType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@@ -28,8 +27,6 @@

    private static final long CHAT_TTL_SECONDS = 7L * 24 * 3600;

    @Value("${llm.platform}")
    private String platform;
    @Autowired
    private LlmChatService llmChatService;
    @Autowired
@@ -40,8 +37,6 @@
    private AiUtils aiUtils;
    @Autowired(required = false)
    private McpController mcpController;
    @Autowired
    private PythonService pythonService;

    public void diagnoseStream(WcsDiagnosisRequest request, SseEmitter emitter) {
        List<ChatCompletionRequest.Message> messages = new ArrayList<>();
@@ -85,7 +80,7 @@
            try {
                try { emitter.send(SseEmitter.event().data("【AI】运行已停止（异常）")); } catch (Exception ignore) {}
                log.error("AI diagnose stream stopped: error", e);
                emitter.completeWithError(e);
                emitter.complete();
            } catch (Exception ignore) {}
        });
    }
@@ -95,11 +90,6 @@
                          String chatId,
                          boolean reset,
                          SseEmitter emitter) {
        if (platform.equals("python")) {
            pythonService.runPython(prompt, chatId, emitter);
            return;
        }

        List<ChatCompletionRequest.Message> messages = new ArrayList<>();

        List<ChatCompletionRequest.Message> history = null;
@@ -187,7 +177,10 @@
                emitter.complete();
            } catch (Exception ignore) {}
        }, e -> {
            try { emitter.completeWithError(e); } catch (Exception ignore) {}
            try {
                try { emitter.send(SseEmitter.event().data("【AI】运行已停止（异常）")); } catch (Exception ignore) {}
                emitter.complete();
            } catch (Exception ignore) {}
        });
    }

@@ -380,7 +373,7 @@
            try {
                sse(emitter, "\\n\\n【AI】运行已停止（异常）\\n\\n");
                log.error("AI MCP diagnose stopped: error", e);
                emitter.completeWithError(e);
                emitter.complete();
            } catch (Exception ignore) {}
            return true;
        }

 src/main/java/com/zy/ai/service/impl/LlmRouteConfigServiceImpl.java

New file
@@ -0,0 +1,11 @@
package com.zy.ai.service.impl;

import com.baomidou.mybatisplus.service.impl.ServiceImpl;
import com.zy.ai.entity.LlmRouteConfig;
import com.zy.ai.mapper.LlmRouteConfigMapper;
import com.zy.ai.service.LlmRouteConfigService;
import org.springframework.stereotype.Service;

@Service("llmRouteConfigService")
public class LlmRouteConfigServiceImpl extends ServiceImpl<LlmRouteConfigMapper, LlmRouteConfig> implements LlmRouteConfigService {
}

 src/main/resources/application.yml

@@ -1,6 +1,6 @@
# 系统版本信息
app:
  version: 1.0.4.2
  version: 1.0.4.3
  version-type: dev  # prd 或 dev

server:
@@ -83,27 +83,15 @@
  expireDays: 7

llm:
  platform: java
  pythonPlatformUrl: http://127.0.0.1:9000/ai/diagnose/askStream
  thinking: enable
#  base-url: https://api.siliconflow.cn/v1
#  api-key: sk-sxdtebtquwrugzrmaqqqkzdzmrgzhzmplwwuowysdasccent
#  model: deepseek-ai/DeepSeek-V3.2
#  base-url: http://47.76.147.249:9998/e/7g7kqxxt1ei2un71
#  api-key: app-mP0O6aY5WpbfaHs7BNnjVkli
#  model: deepseek-ai/DeepSeek-V3.2
#  base-url: http://34.2.134.223:3000/v1
#  api-key: sk-WabrmtOezCFwVo7XvVOrO3QkmfcKG7T7jy0BaVnmQTWm5GXh
#  model: gemini-3-pro-preview
#  base-url: http://127.0.0.1:8317/v1
#  api-key: WznOjAGJNVFKSe9kBZTr
#  model: gpt-5
  base-url: https://api.xiaomimimo.com/v1
  api-key: sk-cw7e4se9cal8cxdgjml8dmtn4pdmqtvfccg5fcermt0ddtys
  model: mimo-v2-flash
  # 现已迁移到数据库表 sys_llm_route 维护（支持多API/多模型/多Key自动切换）
  # 以下仅作为数据库为空时的兼容回退配置
  thinking: false
  base-url:
  api-key:
  model:

perf:
  methodTiming:
    enabled: false
    thresholdMs: 50
    sampleRate: 1.0
    sampleRate: 1.0

 src/main/resources/mapper/LlmRouteConfigMapper.xml

New file
@@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.zy.ai.mapper.LlmRouteConfigMapper">

    <resultMap id="BaseResultMap" type="com.zy.ai.entity.LlmRouteConfig">
        <id column="id" property="id"/>
        <result column="name" property="name"/>
        <result column="base_url" property="baseUrl"/>
        <result column="api_key" property="apiKey"/>
        <result column="model" property="model"/>
        <result column="thinking" property="thinking"/>
        <result column="priority" property="priority"/>
        <result column="status" property="status"/>
        <result column="switch_on_quota" property="switchOnQuota"/>
        <result column="switch_on_error" property="switchOnError"/>
        <result column="cooldown_seconds" property="cooldownSeconds"/>
        <result column="cooldown_until" property="cooldownUntil"/>
        <result column="fail_count" property="failCount"/>
        <result column="success_count" property="successCount"/>
        <result column="consecutive_fail_count" property="consecutiveFailCount"/>
        <result column="last_error" property="lastError"/>
        <result column="last_used_time" property="lastUsedTime"/>
        <result column="last_fail_time" property="lastFailTime"/>
        <result column="create_time" property="createTime"/>
        <result column="update_time" property="updateTime"/>
        <result column="memo" property="memo"/>
    </resultMap>

</mapper>

 src/main/resources/sql/20260303_add_ai_config_menu.sql

New file
@@ -0,0 +1,46 @@
-- 将 AI配置 菜单挂载到：开发专用 -> AI配置
-- 说明：本系统菜单来源于 sys_resource，执行本脚本后请在“角色授权”里给对应角色勾选新菜单。

-- 1) 定位“开发专用”一级菜单
SET @dev_parent_id := (
  SELECT id
  FROM sys_resource
  WHERE name = '开发专用' AND level = 1
  ORDER BY id
  LIMIT 1
);

-- 2) 新增二级菜单：AI配置（页面）
INSERT INTO sys_resource(code, name, resource_id, level, sort, status)
SELECT 'ai/llm_config.html', 'AI配置', @dev_parent_id, 2, 999, 1
FROM dual
WHERE @dev_parent_id IS NOT NULL
  AND NOT EXISTS (
    SELECT 1
    FROM sys_resource
    WHERE code = 'ai/llm_config.html' AND level = 2
  );

-- 3) 新增三级按钮权限：查看（用于角色细粒度授权）
SET @ai_cfg_id := (
  SELECT id
  FROM sys_resource
  WHERE code = 'ai/llm_config.html' AND level = 2
  ORDER BY id
  LIMIT 1
);

INSERT INTO sys_resource(code, name, resource_id, level, sort, status)
SELECT 'ai/llm_config.html#view', '查看', @ai_cfg_id, 3, 1, 1
FROM dual
WHERE @ai_cfg_id IS NOT NULL
  AND NOT EXISTS (
    SELECT 1
    FROM sys_resource
    WHERE code = 'ai/llm_config.html#view' AND level = 3
  );

-- 可选检查
SELECT id, code, name, resource_id, level, sort, status
FROM sys_resource
WHERE code IN ('ai/llm_config.html', 'ai/llm_config.html#view');

 src/main/resources/sql/20260303_create_sys_llm_route.sql

New file
@@ -0,0 +1,32 @@
CREATE TABLE IF NOT EXISTS `sys_llm_route` (
  `id` BIGINT NOT NULL AUTO_INCREMENT COMMENT '主键',
  `name` VARCHAR(64) NOT NULL COMMENT '路由名称',
  `base_url` VARCHAR(255) DEFAULT NULL COMMENT 'LLM API Base URL',
  `api_key` VARCHAR(512) DEFAULT NULL COMMENT 'API Key',
  `model` VARCHAR(128) DEFAULT NULL COMMENT '模型名',
  `thinking` TINYINT NOT NULL DEFAULT 0 COMMENT '是否开启深度思考:1是0否',
  `priority` INT NOT NULL DEFAULT 100 COMMENT '优先级(越小越优先)',
  `status` TINYINT NOT NULL DEFAULT 1 COMMENT '状态:1启用0禁用',
  `switch_on_quota` TINYINT NOT NULL DEFAULT 1 COMMENT '额度耗尽时是否切换',
  `switch_on_error` TINYINT NOT NULL DEFAULT 1 COMMENT '故障时是否切换',
  `cooldown_seconds` INT NOT NULL DEFAULT 300 COMMENT '故障后冷却秒数',
  `cooldown_until` DATETIME DEFAULT NULL COMMENT '冷却截止时间',
  `fail_count` INT NOT NULL DEFAULT 0 COMMENT '总失败次数',
  `success_count` INT NOT NULL DEFAULT 0 COMMENT '总成功次数',
  `consecutive_fail_count` INT NOT NULL DEFAULT 0 COMMENT '连续失败次数',
  `last_error` VARCHAR(512) DEFAULT NULL COMMENT '最近错误摘要',
  `last_used_time` DATETIME DEFAULT NULL COMMENT '最近成功调用时间',
  `last_fail_time` DATETIME DEFAULT NULL COMMENT '最近失败时间',
  `create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  `update_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
  `memo` VARCHAR(255) DEFAULT NULL COMMENT '备注',
  PRIMARY KEY (`id`),
  KEY `idx_sys_llm_route_status_priority` (`status`, `priority`),
  KEY `idx_sys_llm_route_cooldown` (`cooldown_until`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='LLM 路由配置(多API/多模型/多Key自动切换)';

-- 示例数据（按需修改后执行）
-- INSERT INTO sys_llm_route(name, base_url, api_key, model, thinking, priority, status, switch_on_quota, switch_on_error, cooldown_seconds)
-- VALUES ('主路由-gpt5', 'https://api.xiaomimimo.com/v1', 'sk-xxxx', 'gpt-5', 1, 10, 1, 1, 1, 300);
-- INSERT INTO sys_llm_route(name, base_url, api_key, model, thinking, priority, status, switch_on_quota, switch_on_error, cooldown_seconds)
-- VALUES ('备路由-mimo', 'https://api.xiaomimimo.com/v1', 'sk-yyyy', 'mimo-v2-flash', 0, 20, 1, 1, 1, 300);

 src/main/webapp/views/ai/llm_config.html

New file
@@ -0,0 +1,421 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>AI配置</title>
  <link rel="stylesheet" href="../../static/vue/element/element.css" />
  <style>
    body {
      margin: 0;
      background:
        radial-gradient(1200px 500px at 10% -10%, rgba(26, 115, 232, 0.14), transparent 50%),
        radial-gradient(900px 450px at 100% 0%, rgba(38, 166, 154, 0.11), transparent 55%),
        #f4f7fb;
    }
    .container {
      max-width: 1640px;
      margin: 16px auto;
      padding: 0 14px;
    }
    .hero {
      background: linear-gradient(135deg, #0f4c81 0%, #1f6fb2 45%, #2aa198 100%);
      color: #fff;
      border-radius: 14px;
      padding: 14px 16px;
      margin-bottom: 10px;
      box-shadow: 0 10px 28px rgba(23, 70, 110, 0.22);
    }
    .hero-top {
      display: flex;
      align-items: center;
      justify-content: space-between;
      gap: 10px;
    }
    .hero-title {
      display: flex;
      align-items: center;
      gap: 10px;
    }
    .hero-title .main {
      font-size: 16px;
      font-weight: 700;
      letter-spacing: 0.2px;
    }
    .hero-title .sub {
      font-size: 12px;
      opacity: 0.9;
    }
    .summary-grid {
      margin-top: 10px;
      display: grid;
      grid-template-columns: repeat(5, minmax(0, 1fr));
      gap: 8px;
    }
    .summary-card {
      border-radius: 10px;
      background: rgba(255, 255, 255, 0.16);
      border: 1px solid rgba(255, 255, 255, 0.24);
      padding: 8px 10px;
      min-height: 56px;
      backdrop-filter: blur(3px);
    }
    .summary-card .k {
      font-size: 11px;
      opacity: 0.88;
    }
    .summary-card .v {
      margin-top: 4px;
      font-size: 22px;
      font-weight: 700;
      line-height: 1.1;
    }
    .table-shell {
      border-radius: 12px;
      overflow: hidden;
      box-shadow: 0 6px 22px rgba(15, 28, 48, 0.08);
      border: 1px solid #e8edf5;
      background: #fff;
    }
    .mono {
      font-family: Menlo, Monaco, Consolas, "Liberation Mono", monospace;
      font-size: 12px;
    }
    @media (max-width: 1280px) {
      .summary-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
    }
  </style>
</head>
<body>
<div id="app" class="container">
  <div class="hero">
    <div class="hero-top">
      <div class="hero-title">
        <div v-html="headerIcon" style="display:flex;"></div>
        <div>
          <div class="main">AI配置 - LLM路由</div>
          <div class="sub">支持多API、多模型、多Key，额度耗尽或故障自动切换</div>
        </div>
      </div>
      <div>
        <el-button type="primary" size="mini" @click="addRoute">新增路由</el-button>
        <el-button size="mini" @click="loadRoutes">刷新</el-button>
      </div>
    </div>
    <div class="summary-grid">
      <div class="summary-card">
        <div class="k">总路由</div>
        <div class="v">{{ summary.total }}</div>
      </div>
      <div class="summary-card">
        <div class="k">启用</div>
        <div class="v">{{ summary.enabled }}</div>
      </div>
      <div class="summary-card">
        <div class="k">故障切换开启</div>
        <div class="v">{{ summary.errorSwitch }}</div>
      </div>
      <div class="summary-card">
        <div class="k">额度切换开启</div>
        <div class="v">{{ summary.quotaSwitch }}</div>
      </div>
      <div class="summary-card">
        <div class="k">冷却中</div>
        <div class="v">{{ summary.cooling }}</div>
      </div>
    </div>
  </div>

  <div class="table-shell">
    <el-table :data="routes" stripe height="72vh" v-loading="loading" :header-cell-style="{background:'#f7f9fc', color:'#2e3a4d', fontWeight:600}">
      <el-table-column label="名称" width="170">
        <template slot-scope="scope">
          <el-input v-model="scope.row.name" size="mini"></el-input>
        </template>
      </el-table-column>

      <el-table-column label="Base URL" min-width="220">
        <template slot-scope="scope">
          <el-input v-model="scope.row.baseUrl" class="mono" size="mini" placeholder="必填，例如: https://api.deepseek.com"></el-input>
        </template>
      </el-table-column>

      <el-table-column label="模型" width="180">
        <template slot-scope="scope">
          <el-input v-model="scope.row.model" class="mono" size="mini" placeholder="必填，例如: deepseek-chat"></el-input>
        </template>
      </el-table-column>

      <el-table-column label="API Key" min-width="220">
        <template slot-scope="scope">
          <el-input v-model="scope.row.apiKey" class="mono" type="password" size="mini" placeholder="必填"></el-input>
        </template>
      </el-table-column>

      <el-table-column label="优先级" width="90">
        <template slot-scope="scope">
          <el-input-number v-model="scope.row.priority" size="mini" :min="0" :max="99999" :controls="false" style="width:80px;"></el-input-number>
        </template>
      </el-table-column>

      <el-table-column label="状态" width="70">
        <template slot-scope="scope">
          <el-switch v-model="scope.row.status" :active-value="1" :inactive-value="0"></el-switch>
        </template>
      </el-table-column>

      <el-table-column label="思考" width="70">
        <template slot-scope="scope">
          <el-switch v-model="scope.row.thinking" :active-value="1" :inactive-value="0"></el-switch>
        </template>
      </el-table-column>

      <el-table-column label="额度切换" width="90">
        <template slot-scope="scope">
          <el-switch v-model="scope.row.switchOnQuota" :active-value="1" :inactive-value="0"></el-switch>
        </template>
      </el-table-column>

      <el-table-column label="故障切换" width="90">
        <template slot-scope="scope">
          <el-switch v-model="scope.row.switchOnError" :active-value="1" :inactive-value="0"></el-switch>
        </template>
      </el-table-column>

      <el-table-column label="冷却秒数" width="100">
        <template slot-scope="scope">
          <el-input-number v-model="scope.row.cooldownSeconds" size="mini" :min="0" :max="86400" :controls="false" style="width:90px;"></el-input-number>
        </template>
      </el-table-column>

      <el-table-column label="统计" min-width="220">
        <template slot-scope="scope">
          <div>成功: {{ scope.row.successCount || 0 }} / 失败: {{ scope.row.failCount || 0 }} / 连续失败: {{ scope.row.consecutiveFailCount || 0 }}</div>
          <div style="color:#909399;">冷却到: {{ scope.row.cooldownUntil || '-' }}</div>
          <div style="color:#909399;">最近错误: {{ scope.row.lastError || '-' }}</div>
        </template>
      </el-table-column>

      <el-table-column label="操作" width="120" fixed="right" align="center">
        <template slot-scope="scope">
          <el-dropdown trigger="click" @command="function(cmd){ handleRouteCommand(cmd, scope.row, scope.$index); }">
            <el-button size="mini" type="primary" plain>
              操作<i class="el-icon-arrow-down el-icon--right"></i>
            </el-button>
            <el-dropdown-menu slot="dropdown">
              <el-dropdown-item command="test" :disabled="scope.row.__testing === true">
                {{ scope.row.__testing === true ? '测试中...' : '测试' }}
              </el-dropdown-item>
              <el-dropdown-item command="save">保存</el-dropdown-item>
              <el-dropdown-item command="cooldown">清冷却</el-dropdown-item>
              <el-dropdown-item command="delete" divided>删除</el-dropdown-item>
            </el-dropdown-menu>
          </el-dropdown>
        </template>
      </el-table-column>
    </el-table>
  </div>
</div>

<script type="text/javascript" src="../../static/vue/js/vue.min.js"></script>
<script type="text/javascript" src="../../static/vue/element/element.js"></script>
<script type="text/javascript" src="../../static/js/common.js" charset="utf-8"></script>
<script>
  new Vue({
    el: '#app',
    data: function() {
      return {
        headerIcon: getAiIconHtml(34, 34),
        loading: false,
        routes: []
      };
    },
    computed: {
      summary: function() {
        var now = Date.now();
        var total = this.routes.length;
        var enabled = 0, quotaSwitch = 0, errorSwitch = 0, cooling = 0;
        for (var i = 0; i < this.routes.length; i++) {
          var x = this.routes[i];
          if (x.status === 1) enabled++;
          if (x.switchOnQuota === 1) quotaSwitch++;
          if (x.switchOnError === 1) errorSwitch++;
          if (x.cooldownUntil && new Date(x.cooldownUntil).getTime() > now) cooling++;
        }
        return { total: total, enabled: enabled, quotaSwitch: quotaSwitch, errorSwitch: errorSwitch, cooling: cooling };
      }
    },
    methods: {
      authHeaders: function() {
        return { 'token': localStorage.getItem('token') };
      },
      handleRouteCommand: function(command, route, idx) {
        if (command === 'test') return this.testRoute(route);
        if (command === 'save') return this.saveRoute(route);
        if (command === 'cooldown') return this.clearCooldown(route);
        if (command === 'delete') return this.deleteRoute(route, idx);
      },
      loadRoutes: function() {
        var self = this;
        self.loading = true;
        fetch(baseUrl + '/ai/llm/config/list/auth', { headers: self.authHeaders() })
          .then(function(r){ return r.json(); })
          .then(function(res){
            self.loading = false;
            if (res && res.code === 200) {
              self.routes = Array.isArray(res.data) ? res.data : [];
            } else {
              self.$message.error((res && res.msg) ? res.msg : '加载失败');
            }
          })
          .catch(function(){
            self.loading = false;
            self.$message.error('加载失败');
          });
      },
      addRoute: function() {
        this.routes.unshift({
          id: null,
          name: '',
          baseUrl: '',
          apiKey: '',
          model: '',
          thinking: 0,
          priority: 100,
          status: 1,
          switchOnQuota: 1,
          switchOnError: 1,
          cooldownSeconds: 300,
          successCount: 0,
          failCount: 0,
          consecutiveFailCount: 0,
          cooldownUntil: null,
          lastError: null
        });
      },
      buildPayload: function(route) {
        return {
          id: route.id,
          name: route.name,
          baseUrl: route.baseUrl,
          apiKey: route.apiKey,
          model: route.model,
          thinking: route.thinking,
          priority: route.priority,
          status: route.status,
          switchOnQuota: route.switchOnQuota,
          switchOnError: route.switchOnError,
          cooldownSeconds: route.cooldownSeconds,
          memo: route.memo
        };
      },
      saveRoute: function(route) {
        var self = this;
        fetch(baseUrl + '/ai/llm/config/save/auth', {
          method: 'POST',
          headers: Object.assign({ 'Content-Type': 'application/json' }, self.authHeaders()),
          body: JSON.stringify(self.buildPayload(route))
        })
          .then(function(r){ return r.json(); })
          .then(function(res){
            if (res && res.code === 200) {
              self.$message.success('保存成功');
              self.loadRoutes();
            } else {
              self.$message.error((res && res.msg) ? res.msg : '保存失败');
            }
          })
          .catch(function(){
            self.$message.error('保存失败');
          });
      },
      deleteRoute: function(route, idx) {
        var self = this;
        if (!route.id) {
          self.routes.splice(idx, 1);
          return;
        }
        self.$confirm('确定删除该路由吗？', '提示', { type: 'warning' }).then(function() {
        fetch(baseUrl + '/ai/llm/config/delete/auth?id=' + encodeURIComponent(route.id), {
          method: 'POST',
          headers: self.authHeaders()
        })
          .then(function(r){ return r.json(); })
          .then(function(res){
            if (res && res.code === 200) {
              self.$message.success('删除成功');
              self.loadRoutes();
            } else {
              self.$message.error((res && res.msg) ? res.msg : '删除失败');
            }
          })
          .catch(function(){
            self.$message.error('删除失败');
          });
        }).catch(function(){});
      },
      clearCooldown: function(route) {
        var self = this;
        if (!route.id) return;
        fetch(baseUrl + '/ai/llm/config/clearCooldown/auth?id=' + encodeURIComponent(route.id), {
          method: 'POST',
          headers: self.authHeaders()
        })
          .then(function(r){ return r.json(); })
          .then(function(res){
            if (res && res.code === 200) {
              self.$message.success('已清除冷却');
              self.loadRoutes();
            } else {
              self.$message.error((res && res.msg) ? res.msg : '操作失败');
            }
          })
          .catch(function(){
            self.$message.error('操作失败');
          });
      },
      testRoute: function(route) {
        var self = this;
        if (route.__testing === true) return;
        if (!route.id) {
          self.$message.warning('当前是未保存配置，测试通过后仍需先保存才会生效');
        }
        self.$set(route, '__testing', true);
        fetch(baseUrl + '/ai/llm/config/test/auth', {
          method: 'POST',
          headers: Object.assign({ 'Content-Type': 'application/json' }, self.authHeaders()),
          body: JSON.stringify(self.buildPayload(route))
        })
          .then(function(r){ return r.json(); })
          .then(function(res){
            if (!res || res.code !== 200) {
              self.$message.error((res && res.msg) ? res.msg : '测试失败');
              return;
            }
            var data = res.data || {};
            var ok = data.ok === true;
            var title = ok ? '测试成功' : '测试失败';
            var msg = ''
              + '路由: ' + (route.name || '-') + '\n'
              + 'Base URL: ' + (route.baseUrl || '-') + '\n'
              + '状态码: ' + (data.statusCode != null ? data.statusCode : '-') + '\n'
              + '耗时: ' + (data.latencyMs != null ? data.latencyMs : '-') + ' ms\n'
              + '结果: ' + (data.message || '-') + '\n'
              + '返回片段: ' + (data.responseSnippet || '-');
            self.$alert(msg, title, { confirmButtonText: '确定', type: ok ? 'success' : 'error' });
          })
          .catch(function(){
            self.$message.error('测试失败');
          })
          .finally(function(){
            self.$set(route, '__testing', false);
          });
      }
    },
    mounted: function() {
      this.loadRoutes();
    }
  });
</script>
</body>
</html>

New file
			@@ -0,0 +1,131 @@
			package com.zy.ai.controller;

			import com.baomidou.mybatisplus.mapper.EntityWrapper;
			import com.core.annotations.ManagerAuth;
			import com.core.common.R;
			import com.zy.ai.entity.LlmRouteConfig;
			import com.zy.ai.service.LlmRouteConfigService;
			import com.zy.ai.service.LlmRoutingService;
			import com.zy.common.web.BaseController;
			import lombok.RequiredArgsConstructor;
			import org.springframework.web.bind.annotation.*;

			import java.util.Date;
			import java.util.List;
			import java.util.Map;

			@RestController
			@RequestMapping("/ai/llm/config")
			@RequiredArgsConstructor
			public class LlmRouteConfigController extends BaseController {

			private final LlmRouteConfigService llmRouteConfigService;
			private final LlmRoutingService llmRoutingService;

			@GetMapping("/list/auth")
			@ManagerAuth
			public R list() {
			EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>();
			wrapper.orderBy("priority", true).orderBy("id", true);
			List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper);
			return R.ok(list);
			}

			@PostMapping("/save/auth")
			@ManagerAuth
			public R save(@RequestBody LlmRouteConfig config) {
			if (config == null) {
			return R.error("参数不能为空");
			}

			if (isBlank(config.getBaseUrl()) \|\| isBlank(config.getApiKey()) \|\| isBlank(config.getModel())) {
			return R.error("必须填写 baseUrl/apiKey/model");
			}

			if (config.getId() == null) {
			llmRoutingService.fillAndNormalize(config, true);
			llmRouteConfigService.insert(config);
			} else {
			LlmRouteConfig db = llmRouteConfigService.selectById(config.getId());
			if (db == null) {
			return R.error("配置不存在");
			}
			// 保留统计字段，避免前端误覆盖
			Integer failCount = db.getFailCount();
			Integer successCount = db.getSuccessCount();
			Integer consecutiveFailCount = db.getConsecutiveFailCount();
			Date lastFailTime = db.getLastFailTime();
			Date lastUsedTime = db.getLastUsedTime();
			String lastError = db.getLastError();

			llmRoutingService.fillAndNormalize(config, false);
			config.setFailCount(failCount);
			config.setSuccessCount(successCount);
			config.setConsecutiveFailCount(consecutiveFailCount);
			config.setLastFailTime(lastFailTime);
			config.setLastUsedTime(lastUsedTime);
			config.setLastError(lastError);
			config.setCreateTime(db.getCreateTime());
			llmRouteConfigService.updateById(config);
			}

			llmRoutingService.evictCache();
			return R.ok(config);
			}

			@PostMapping("/delete/auth")
			@ManagerAuth
			public R delete(@RequestParam("id") Long id) {
			if (id == null) {
			return R.error("id不能为空");
			}
			llmRouteConfigService.deleteById(id);
			llmRoutingService.evictCache();
			return R.ok();
			}

			@PostMapping("/clearCooldown/auth")
			@ManagerAuth
			public R clearCooldown(@RequestParam("id") Long id) {
			if (id == null) {
			return R.error("id不能为空");
			}
			LlmRouteConfig cfg = llmRouteConfigService.selectById(id);
			if (cfg == null) {
			return R.error("配置不存在");
			}
			cfg.setCooldownUntil(null);
			cfg.setConsecutiveFailCount(0);
			cfg.setUpdateTime(new Date());
			llmRouteConfigService.updateById(cfg);
			llmRoutingService.evictCache();
			return R.ok();
			}

			@PostMapping("/test/auth")
			@ManagerAuth
			public R test(@RequestBody LlmRouteConfig config) {
			if (config == null) {
			return R.error("参数不能为空");
			}
			if (isBlank(config.getBaseUrl()) \|\| isBlank(config.getApiKey()) \|\| isBlank(config.getModel())) {
			return R.error("测试失败：必须填写 baseUrl/apiKey/model");
			}
			Map<String, Object> data = llmRoutingService.testRoute(config);
			if (Boolean.TRUE.equals(data.get("ok")) && config.getId() != null) {
			LlmRouteConfig db = llmRouteConfigService.selectById(config.getId());
			if (db != null) {
			db.setCooldownUntil(null);
			db.setConsecutiveFailCount(0);
			db.setUpdateTime(new Date());
			llmRouteConfigService.updateById(db);
			llmRoutingService.evictCache();
			}
			}
			return R.ok(data);
			}

			private boolean isBlank(String s) {
			return s == null \|\| s.trim().isEmpty();
			}
			}

			@@ -33,7 +33,7 @@
			WcsDiagnosisRequest request = aiUtils.makeAiRequest(1000, "对当前系统进行巡检，如果有异常情况就进行详细的分析，如果没有异常情况则当成一次检查\n\n");
			wcsDiagnosisService.diagnoseStream(request, emitter);
			} catch (Exception e) {
			emitter.completeWithError(e);
			try { emitter.complete(); } catch (Exception ignore) {}
			}
			}).start();

			@@ -50,7 +50,7 @@
			WcsDiagnosisRequest request = aiUtils.makeAiRequest(100, null);
			wcsDiagnosisService.askStream(request, prompt, chatId, reset, emitter);
			} catch (Exception e) {
			emitter.completeWithError(e);
			try { emitter.complete(); } catch (Exception ignore) {}
			}
			}).start();
			return emitter;

New file
			@@ -0,0 +1,249 @@
			package com.zy.ai.entity;

			import com.baomidou.mybatisplus.annotations.TableField;
			import com.baomidou.mybatisplus.annotations.TableId;
			import com.baomidou.mybatisplus.annotations.TableName;
			import com.baomidou.mybatisplus.enums.IdType;

			import java.io.Serializable;
			import java.util.Date;

			@TableName("sys_llm_route")
			public class LlmRouteConfig implements Serializable {

			private static final long serialVersionUID = 1L;

			@TableId(value = "id", type = IdType.AUTO)
			private Long id;

			private String name;

			@TableField("base_url")
			private String baseUrl;

			@TableField("api_key")
			private String apiKey;

			private String model;

			/**
			* 1 开启深度思考 0 关闭
			*/
			private Short thinking;

			/**
			* 数字越小优先级越高
			*/
			private Integer priority;

			/**
			* 1 启用 0 禁用
			*/
			private Short status;

			@TableField("switch_on_quota")
			private Short switchOnQuota;

			@TableField("switch_on_error")
			private Short switchOnError;

			@TableField("cooldown_seconds")
			private Integer cooldownSeconds;

			@TableField("cooldown_until")
			private Date cooldownUntil;

			@TableField("fail_count")
			private Integer failCount;

			@TableField("success_count")
			private Integer successCount;

			@TableField("consecutive_fail_count")
			private Integer consecutiveFailCount;

			@TableField("last_error")
			private String lastError;

			@TableField("last_used_time")
			private Date lastUsedTime;

			@TableField("last_fail_time")
			private Date lastFailTime;

			@TableField("create_time")
			private Date createTime;

			@TableField("update_time")
			private Date updateTime;

			private String memo;

			public Long getId() {
			return id;
			}

			public void setId(Long id) {
			this.id = id;
			}

			public String getName() {
			return name;
			}

			public void setName(String name) {
			this.name = name;
			}

			public String getBaseUrl() {
			return baseUrl;
			}

			public void setBaseUrl(String baseUrl) {
			this.baseUrl = baseUrl;
			}

			public String getApiKey() {
			return apiKey;
			}

			public void setApiKey(String apiKey) {
			this.apiKey = apiKey;
			}

			public String getModel() {
			return model;
			}

			public void setModel(String model) {
			this.model = model;
			}

			public Short getThinking() {
			return thinking;
			}

			public void setThinking(Short thinking) {
			this.thinking = thinking;
			}

			public Integer getPriority() {
			return priority;
			}

			public void setPriority(Integer priority) {
			this.priority = priority;
			}

			public Short getStatus() {
			return status;
			}

			public void setStatus(Short status) {
			this.status = status;
			}

			public Short getSwitchOnQuota() {
			return switchOnQuota;
			}

			public void setSwitchOnQuota(Short switchOnQuota) {
			this.switchOnQuota = switchOnQuota;
			}

			public Short getSwitchOnError() {
			return switchOnError;
			}

			public void setSwitchOnError(Short switchOnError) {
			this.switchOnError = switchOnError;
			}

			public Integer getCooldownSeconds() {
			return cooldownSeconds;
			}

			public void setCooldownSeconds(Integer cooldownSeconds) {
			this.cooldownSeconds = cooldownSeconds;
			}

			public Date getCooldownUntil() {
			return cooldownUntil;
			}

			public void setCooldownUntil(Date cooldownUntil) {
			this.cooldownUntil = cooldownUntil;
			}

			public Integer getFailCount() {
			return failCount;
			}

			public void setFailCount(Integer failCount) {
			this.failCount = failCount;
			}

			public Integer getSuccessCount() {
			return successCount;
			}

			public void setSuccessCount(Integer successCount) {
			this.successCount = successCount;
			}

			public Integer getConsecutiveFailCount() {
			return consecutiveFailCount;
			}

			public void setConsecutiveFailCount(Integer consecutiveFailCount) {
			this.consecutiveFailCount = consecutiveFailCount;
			}

			public String getLastError() {
			return lastError;
			}

			public void setLastError(String lastError) {
			this.lastError = lastError;
			}

			public Date getLastUsedTime() {
			return lastUsedTime;
			}

			public void setLastUsedTime(Date lastUsedTime) {
			this.lastUsedTime = lastUsedTime;
			}

			public Date getLastFailTime() {
			return lastFailTime;
			}

			public void setLastFailTime(Date lastFailTime) {
			this.lastFailTime = lastFailTime;
			}

			public Date getCreateTime() {
			return createTime;
			}

			public void setCreateTime(Date createTime) {
			this.createTime = createTime;
			}

			public Date getUpdateTime() {
			return updateTime;
			}

			public void setUpdateTime(Date updateTime) {
			this.updateTime = updateTime;
			}

			public String getMemo() {
			return memo;
			}

			public void setMemo(String memo) {
			this.memo = memo;
			}
			}

New file
			@@ -0,0 +1,11 @@
			package com.zy.ai.mapper;

			import com.baomidou.mybatisplus.mapper.BaseMapper;
			import com.zy.ai.entity.LlmRouteConfig;
			import org.apache.ibatis.annotations.Mapper;
			import org.springframework.stereotype.Repository;

			@Mapper
			@Repository
			public interface LlmRouteConfigMapper extends BaseMapper<LlmRouteConfig> {
			}

New file
			@@ -0,0 +1,7 @@
			package com.zy.ai.service;

			import com.baomidou.mybatisplus.service.IService;
			import com.zy.ai.entity.LlmRouteConfig;

			public interface LlmRouteConfigService extends IService<LlmRouteConfig> {
			}

New file
			@@ -0,0 +1,270 @@
			package com.zy.ai.service;

			import com.baomidou.mybatisplus.mapper.EntityWrapper;
			import com.zy.ai.entity.LlmRouteConfig;
			import lombok.RequiredArgsConstructor;
			import lombok.extern.slf4j.Slf4j;
			import org.springframework.http.HttpHeaders;
			import org.springframework.http.MediaType;
			import org.springframework.stereotype.Service;
			import org.springframework.web.reactive.function.client.WebClient;
			import reactor.core.publisher.Mono;

			import java.time.Duration;
			import java.util.ArrayList;
			import java.util.Collections;
			import java.util.Date;
			import java.util.HashMap;
			import java.util.List;
			import java.util.Map;

			@Slf4j
			@Service
			@RequiredArgsConstructor
			public class LlmRoutingService {

			private static final long CACHE_TTL_MS = 3000L;

			private final LlmRouteConfigService llmRouteConfigService;

			private volatile List<LlmRouteConfig> allRouteCache = Collections.emptyList();
			private volatile long cacheExpireAt = 0L;

			public void evictCache() {
			cacheExpireAt = 0L;
			}

			public List<LlmRouteConfig> listAllOrdered() {
			return new ArrayList<>(loadAllRoutes());
			}

			public List<LlmRouteConfig> listAvailableRoutes() {
			Date now = new Date();
			List<LlmRouteConfig> result = new ArrayList<>();
			List<LlmRouteConfig> coolingRoutes = new ArrayList<>();
			int total = 0;
			int disabled = 0;
			int invalid = 0;
			for (LlmRouteConfig c : loadAllRoutes()) {
			total++;
			if (!isEnabled(c)) {
			disabled++;
			continue;
			}
			if (isBlank(c.getBaseUrl()) \|\| isBlank(c.getApiKey()) \|\| isBlank(c.getModel())) {
			invalid++;
			continue;
			}
			if (isCooling(c, now)) {
			coolingRoutes.add(c);
			continue;
			}
			result.add(c);
			}
			if (result.isEmpty() && !coolingRoutes.isEmpty()) {
			// 避免所有路由都处于冷却时系统完全不可用，降级允许使用冷却路由
			log.warn("LLM 路由均处于冷却，降级启用冷却路由。cooling={}, total={}", coolingRoutes.size(), total);
			return coolingRoutes;
			}
			if (result.isEmpty()) {
			log.warn("未找到可用 LLM 路由。total={}, disabled={}, invalid={}", total, disabled, invalid);
			}
			return result;
			}

			public void markSuccess(Long routeId) {
			if (routeId == null) return;
			try {
			LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
			if (db == null) return;
			db.setSuccessCount(nvl(db.getSuccessCount()) + 1);
			db.setConsecutiveFailCount(0);
			db.setLastUsedTime(new Date());
			db.setUpdateTime(new Date());
			llmRouteConfigService.updateById(db);
			evictCache();
			} catch (Exception e) {
			log.warn("更新路由成功状态失败, routeId={}", routeId, e);
			}
			}

			public void markFailure(Long routeId, String errorText, boolean enterCooldown, Integer cooldownSeconds) {
			if (routeId == null) return;
			try {
			LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
			if (db == null) return;
			Date now = new Date();
			db.setFailCount(nvl(db.getFailCount()) + 1);
			db.setConsecutiveFailCount(nvl(db.getConsecutiveFailCount()) + 1);
			db.setLastFailTime(now);
			db.setLastError(trimError(errorText));
			if (enterCooldown) {
			int sec = cooldownSeconds != null && cooldownSeconds > 0
			? cooldownSeconds
			: defaultCooldown(db.getCooldownSeconds());
			db.setCooldownUntil(new Date(now.getTime() + sec * 1000L));
			}
			db.setUpdateTime(now);
			llmRouteConfigService.updateById(db);
			evictCache();
			} catch (Exception e) {
			log.warn("更新路由失败状态失败, routeId={}", routeId, e);
			}
			}

			private int defaultCooldown(Integer sec) {
			return sec == null \|\| sec <= 0 ? 300 : sec;
			}

			private String trimError(String err) {
			if (err == null) return null;
			String x = err.replace("\n", " ").replace("\r", " ");
			return x.length() > 500 ? x.substring(0, 500) : x;
			}

			private Integer nvl(Integer x) {
			return x == null ? 0 : x;
			}

			private boolean isEnabled(LlmRouteConfig c) {
			return c != null && c.getStatus() != null && c.getStatus() == 1;
			}

			private boolean isCooling(LlmRouteConfig c, Date now) {
			return c != null && c.getCooldownUntil() != null && c.getCooldownUntil().after(now);
			}

			private List<LlmRouteConfig> loadAllRoutes() {
			long now = System.currentTimeMillis();
			if (now < cacheExpireAt && allRouteCache != null) {
			return allRouteCache;
			}
			synchronized (this) {
			now = System.currentTimeMillis();
			if (now < cacheExpireAt && allRouteCache != null) {
			return allRouteCache;
			}
			EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>();
			wrapper.orderBy("priority", true).orderBy("id", true);
			List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper);
			allRouteCache = list == null ? Collections.emptyList() : list;
			cacheExpireAt = System.currentTimeMillis() + CACHE_TTL_MS;
			return allRouteCache;
			}
			}

			private String safe(String s) {
			return s == null ? "" : s.trim();
			}

			private boolean isBlank(String s) {
			return s == null \|\| s.trim().isEmpty();
			}

			public LlmRouteConfig fillAndNormalize(LlmRouteConfig cfg, boolean isCreate) {
			Date now = new Date();
			if (isBlank(cfg.getName())) {
			cfg.setName("LLM_ROUTE_" + now.getTime());
			}
			if (cfg.getThinking() == null) {
			cfg.setThinking((short) 0);
			}
			if (cfg.getPriority() == null) {
			cfg.setPriority(100);
			}
			if (cfg.getStatus() == null) {
			cfg.setStatus((short) 1);
			}
			if (cfg.getSwitchOnQuota() == null) {
			cfg.setSwitchOnQuota((short) 1);
			}
			if (cfg.getSwitchOnError() == null) {
			cfg.setSwitchOnError((short) 1);
			}
			if (cfg.getCooldownSeconds() == null \|\| cfg.getCooldownSeconds() < 0) {
			cfg.setCooldownSeconds(300);
			}
			if (cfg.getFailCount() == null) {
			cfg.setFailCount(0);
			}
			if (cfg.getSuccessCount() == null) {
			cfg.setSuccessCount(0);
			}
			if (cfg.getConsecutiveFailCount() == null) {
			cfg.setConsecutiveFailCount(0);
			}
			if (isCreate) {
			cfg.setCreateTime(now);
			}
			cfg.setUpdateTime(now);
			return cfg;
			}

			public Map<String, Object> testRoute(LlmRouteConfig cfg) {
			HashMap<String, Object> result = new HashMap<>();
			long start = System.currentTimeMillis();
			try {
			TestHttpResult raw = testJavaRoute(cfg);
			fillTestResult(result, raw, start);
			} catch (Exception e) {
			result.put("ok", false);
			result.put("statusCode", -1);
			result.put("latencyMs", System.currentTimeMillis() - start);
			result.put("message", "测试异常: " + safe(e.getMessage()));
			result.put("responseSnippet", "");
			}
			return result;
			}

			private void fillTestResult(HashMap<String, Object> result, TestHttpResult raw, long start) {
			boolean ok = raw.statusCode >= 200 && raw.statusCode < 300;
			result.put("ok", ok);
			result.put("statusCode", raw.statusCode);
			result.put("latencyMs", System.currentTimeMillis() - start);
			result.put("message", ok ? "测试成功" : "测试失败");
			result.put("responseSnippet", trimBody(raw.body));
			}

			private TestHttpResult testJavaRoute(LlmRouteConfig cfg) {
			HashMap<String, Object> req = new HashMap<>();
			req.put("model", cfg.getModel());
			List<Map<String, String>> messages = new ArrayList<>();
			HashMap<String, String> msg = new HashMap<>();
			msg.put("role", "user");
			msg.put("content", "ping");
			messages.add(msg);
			req.put("messages", messages);
			req.put("stream", false);
			req.put("max_tokens", 8);
			req.put("temperature", 0);

			WebClient client = WebClient.builder().baseUrl(cfg.getBaseUrl()).build();
			return client.post()
			.uri("/chat/completions")
			.header(HttpHeaders.AUTHORIZATION, "Bearer " + cfg.getApiKey())
			.contentType(MediaType.APPLICATION_JSON)
			.accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
			.bodyValue(req)
			.exchangeToMono(resp -> resp.bodyToMono(String.class)
			.defaultIfEmpty("")
			.map(body -> new TestHttpResult(resp.rawStatusCode(), body)))
			.timeout(Duration.ofSeconds(12))
			.onErrorResume(ex -> Mono.just(new TestHttpResult(-1, safe(ex.getMessage()))))
			.block();
			}

			private String trimBody(String body) {
			String x = safe(body).replace("\r", " ").replace("\n", " ");
			return x.length() > 300 ? x.substring(0, 300) : x;
			}

			private static class TestHttpResult {
			private final int statusCode;
			private final String body;

			private TestHttpResult(int statusCode, String body) {
			this.statusCode = statusCode;
			this.body = body;
			}
			}
			}

			@@ -11,7 +11,6 @@
			import com.zy.common.utils.RedisUtil;
			import com.zy.core.enums.RedisKeyType;
			import org.springframework.beans.factory.annotation.Autowired;
			import org.springframework.beans.factory.annotation.Value;
			import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
			import lombok.RequiredArgsConstructor;
			import lombok.extern.slf4j.Slf4j;
			@@ -28,8 +27,6 @@

			private static final long CHAT_TTL_SECONDS = 7L * 24 * 3600;

			@Value("${llm.platform}")
			private String platform;
			@Autowired
			private LlmChatService llmChatService;
			@Autowired
			@@ -40,8 +37,6 @@
			private AiUtils aiUtils;
			@Autowired(required = false)
			private McpController mcpController;
			@Autowired
			private PythonService pythonService;

			public void diagnoseStream(WcsDiagnosisRequest request, SseEmitter emitter) {
			List<ChatCompletionRequest.Message> messages = new ArrayList<>();
			@@ -85,7 +80,7 @@
			try {
			try { emitter.send(SseEmitter.event().data("【AI】运行已停止（异常）")); } catch (Exception ignore) {}
			log.error("AI diagnose stream stopped: error", e);
			emitter.completeWithError(e);
			emitter.complete();
			} catch (Exception ignore) {}
			});
			}
			@@ -95,11 +90,6 @@
			String chatId,
			boolean reset,
			SseEmitter emitter) {
			if (platform.equals("python")) {
			pythonService.runPython(prompt, chatId, emitter);
			return;
			}

			List<ChatCompletionRequest.Message> messages = new ArrayList<>();

			List<ChatCompletionRequest.Message> history = null;
			@@ -187,7 +177,10 @@
			emitter.complete();
			} catch (Exception ignore) {}
			}, e -> {
			try { emitter.completeWithError(e); } catch (Exception ignore) {}
			try {
			try { emitter.send(SseEmitter.event().data("【AI】运行已停止（异常）")); } catch (Exception ignore) {}
			emitter.complete();
			} catch (Exception ignore) {}
			});
			}

			@@ -380,7 +373,7 @@
			try {
			sse(emitter, "\\n\\n【AI】运行已停止（异常）\\n\\n");
			log.error("AI MCP diagnose stopped: error", e);
			emitter.completeWithError(e);
			emitter.complete();
			} catch (Exception ignore) {}
			return true;
			}

New file
			@@ -0,0 +1,11 @@
			package com.zy.ai.service.impl;

			import com.baomidou.mybatisplus.service.impl.ServiceImpl;
			import com.zy.ai.entity.LlmRouteConfig;
			import com.zy.ai.mapper.LlmRouteConfigMapper;
			import com.zy.ai.service.LlmRouteConfigService;
			import org.springframework.stereotype.Service;

			@Service("llmRouteConfigService")
			public class LlmRouteConfigServiceImpl extends ServiceImpl<LlmRouteConfigMapper, LlmRouteConfig> implements LlmRouteConfigService {
			}

			@@ -1,6 +1,6 @@
			# 系统版本信息
			app:
			version: 1.0.4.2
			version: 1.0.4.3
			version-type: dev # prd 或 dev

			server:
			@@ -83,27 +83,15 @@
			expireDays: 7

			llm:
			platform: java
			pythonPlatformUrl: http://127.0.0.1:9000/ai/diagnose/askStream
			thinking: enable
			# base-url: https://api.siliconflow.cn/v1
			# api-key: sk-sxdtebtquwrugzrmaqqqkzdzmrgzhzmplwwuowysdasccent
			# model: deepseek-ai/DeepSeek-V3.2
			# base-url: http://47.76.147.249:9998/e/7g7kqxxt1ei2un71
			# api-key: app-mP0O6aY5WpbfaHs7BNnjVkli
			# model: deepseek-ai/DeepSeek-V3.2
			# base-url: http://34.2.134.223:3000/v1
			# api-key: sk-WabrmtOezCFwVo7XvVOrO3QkmfcKG7T7jy0BaVnmQTWm5GXh
			# model: gemini-3-pro-preview
			# base-url: http://127.0.0.1:8317/v1
			# api-key: WznOjAGJNVFKSe9kBZTr
			# model: gpt-5
			base-url: https://api.xiaomimimo.com/v1
			api-key: sk-cw7e4se9cal8cxdgjml8dmtn4pdmqtvfccg5fcermt0ddtys
			model: mimo-v2-flash
			# 现已迁移到数据库表 sys_llm_route 维护（支持多API/多模型/多Key自动切换）
			# 以下仅作为数据库为空时的兼容回退配置
			thinking: false
			base-url:
			api-key:
			model:

			perf:
			methodTiming:
			enabled: false
			thresholdMs: 50
			sampleRate: 1.0
			sampleRate: 1.0

New file
			@@ -0,0 +1,29 @@
			<?xml version="1.0" encoding="UTF-8"?>
			<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
			<mapper namespace="com.zy.ai.mapper.LlmRouteConfigMapper">

			<resultMap id="BaseResultMap" type="com.zy.ai.entity.LlmRouteConfig">
			<id column="id" property="id"/>
			<result column="name" property="name"/>
			<result column="base_url" property="baseUrl"/>
			<result column="api_key" property="apiKey"/>
			<result column="model" property="model"/>
			<result column="thinking" property="thinking"/>
			<result column="priority" property="priority"/>
			<result column="status" property="status"/>
			<result column="switch_on_quota" property="switchOnQuota"/>
			<result column="switch_on_error" property="switchOnError"/>
			<result column="cooldown_seconds" property="cooldownSeconds"/>
			<result column="cooldown_until" property="cooldownUntil"/>
			<result column="fail_count" property="failCount"/>
			<result column="success_count" property="successCount"/>
			<result column="consecutive_fail_count" property="consecutiveFailCount"/>
			<result column="last_error" property="lastError"/>
			<result column="last_used_time" property="lastUsedTime"/>
			<result column="last_fail_time" property="lastFailTime"/>
			<result column="create_time" property="createTime"/>
			<result column="update_time" property="updateTime"/>
			<result column="memo" property="memo"/>
			</resultMap>

			</mapper>

New file
			@@ -0,0 +1,46 @@
			-- 将 AI配置菜单挂载到：开发专用 -> AI配置
			-- 说明：本系统菜单来源于 sys_resource，执行本脚本后请在“角色授权”里给对应角色勾选新菜单。

			-- 1) 定位“开发专用”一级菜单
			SET @dev_parent_id := (
			SELECT id
			FROM sys_resource
			WHERE name = '开发专用' AND level = 1
			ORDER BY id
			LIMIT 1
			);

			-- 2) 新增二级菜单：AI配置（页面）
			INSERT INTO sys_resource(code, name, resource_id, level, sort, status)
			SELECT 'ai/llm_config.html', 'AI配置', @dev_parent_id, 2, 999, 1
			FROM dual
			WHERE @dev_parent_id IS NOT NULL
			AND NOT EXISTS (
			SELECT 1
			FROM sys_resource
			WHERE code = 'ai/llm_config.html' AND level = 2
			);

			-- 3) 新增三级按钮权限：查看（用于角色细粒度授权）
			SET @ai_cfg_id := (
			SELECT id
			FROM sys_resource
			WHERE code = 'ai/llm_config.html' AND level = 2
			ORDER BY id
			LIMIT 1
			);

			INSERT INTO sys_resource(code, name, resource_id, level, sort, status)
			SELECT 'ai/llm_config.html#view', '查看', @ai_cfg_id, 3, 1, 1
			FROM dual
			WHERE @ai_cfg_id IS NOT NULL
			AND NOT EXISTS (
			SELECT 1
			FROM sys_resource
			WHERE code = 'ai/llm_config.html#view' AND level = 3
			);

			-- 可选检查
			SELECT id, code, name, resource_id, level, sort, status
			FROM sys_resource
			WHERE code IN ('ai/llm_config.html', 'ai/llm_config.html#view');

New file
			@@ -0,0 +1,32 @@
			CREATE TABLE IF NOT EXISTS `sys_llm_route` (
			`id` BIGINT NOT NULL AUTO_INCREMENT COMMENT '主键',
			`name` VARCHAR(64) NOT NULL COMMENT '路由名称',
			`base_url` VARCHAR(255) DEFAULT NULL COMMENT 'LLM API Base URL',
			`api_key` VARCHAR(512) DEFAULT NULL COMMENT 'API Key',
			`model` VARCHAR(128) DEFAULT NULL COMMENT '模型名',
			`thinking` TINYINT NOT NULL DEFAULT 0 COMMENT '是否开启深度思考:1是0否',
			`priority` INT NOT NULL DEFAULT 100 COMMENT '优先级(越小越优先)',
			`status` TINYINT NOT NULL DEFAULT 1 COMMENT '状态:1启用0禁用',
			`switch_on_quota` TINYINT NOT NULL DEFAULT 1 COMMENT '额度耗尽时是否切换',
			`switch_on_error` TINYINT NOT NULL DEFAULT 1 COMMENT '故障时是否切换',
			`cooldown_seconds` INT NOT NULL DEFAULT 300 COMMENT '故障后冷却秒数',
			`cooldown_until` DATETIME DEFAULT NULL COMMENT '冷却截止时间',
			`fail_count` INT NOT NULL DEFAULT 0 COMMENT '总失败次数',
			`success_count` INT NOT NULL DEFAULT 0 COMMENT '总成功次数',
			`consecutive_fail_count` INT NOT NULL DEFAULT 0 COMMENT '连续失败次数',
			`last_error` VARCHAR(512) DEFAULT NULL COMMENT '最近错误摘要',
			`last_used_time` DATETIME DEFAULT NULL COMMENT '最近成功调用时间',
			`last_fail_time` DATETIME DEFAULT NULL COMMENT '最近失败时间',
			`create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
			`update_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
			`memo` VARCHAR(255) DEFAULT NULL COMMENT '备注',
			PRIMARY KEY (`id`),
			KEY `idx_sys_llm_route_status_priority` (`status`, `priority`),
			KEY `idx_sys_llm_route_cooldown` (`cooldown_until`)
			) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='LLM 路由配置(多API/多模型/多Key自动切换)';

			-- 示例数据（按需修改后执行）
			-- INSERT INTO sys_llm_route(name, base_url, api_key, model, thinking, priority, status, switch_on_quota, switch_on_error, cooldown_seconds)
			-- VALUES ('主路由-gpt5', 'https://api.xiaomimimo.com/v1', 'sk-xxxx', 'gpt-5', 1, 10, 1, 1, 1, 300);
			-- INSERT INTO sys_llm_route(name, base_url, api_key, model, thinking, priority, status, switch_on_quota, switch_on_error, cooldown_seconds)
			-- VALUES ('备路由-mimo', 'https://api.xiaomimimo.com/v1', 'sk-yyyy', 'mimo-v2-flash', 0, 20, 1, 1, 1, 300);

New file
			@@ -0,0 +1,421 @@
			<!DOCTYPE html>
			<html lang="zh-CN">
			<head>
			<meta charset="UTF-8" />
			<meta name="viewport" content="width=device-width, initial-scale=1.0" />
			<title>AI配置</title>
			<link rel="stylesheet" href="../../static/vue/element/element.css" />
			<style>
			body {
			margin: 0;
			background:
			radial-gradient(1200px 500px at 10% -10%, rgba(26, 115, 232, 0.14), transparent 50%),
			radial-gradient(900px 450px at 100% 0%, rgba(38, 166, 154, 0.11), transparent 55%),
			#f4f7fb;
			}
			.container {
			max-width: 1640px;
			margin: 16px auto;
			padding: 0 14px;
			}
			.hero {
			background: linear-gradient(135deg, #0f4c81 0%, #1f6fb2 45%, #2aa198 100%);
			color: #fff;
			border-radius: 14px;
			padding: 14px 16px;
			margin-bottom: 10px;
			box-shadow: 0 10px 28px rgba(23, 70, 110, 0.22);
			}
			.hero-top {
			display: flex;
			align-items: center;
			justify-content: space-between;
			gap: 10px;
			}
			.hero-title {
			display: flex;
			align-items: center;
			gap: 10px;
			}
			.hero-title .main {
			font-size: 16px;
			font-weight: 700;
			letter-spacing: 0.2px;
			}
			.hero-title .sub {
			font-size: 12px;
			opacity: 0.9;
			}
			.summary-grid {
			margin-top: 10px;
			display: grid;
			grid-template-columns: repeat(5, minmax(0, 1fr));
			gap: 8px;
			}
			.summary-card {
			border-radius: 10px;
			background: rgba(255, 255, 255, 0.16);
			border: 1px solid rgba(255, 255, 255, 0.24);
			padding: 8px 10px;
			min-height: 56px;
			backdrop-filter: blur(3px);
			}
			.summary-card .k {
			font-size: 11px;
			opacity: 0.88;
			}
			.summary-card .v {
			margin-top: 4px;
			font-size: 22px;
			font-weight: 700;
			line-height: 1.1;
			}
			.table-shell {
			border-radius: 12px;
			overflow: hidden;
			box-shadow: 0 6px 22px rgba(15, 28, 48, 0.08);
			border: 1px solid #e8edf5;
			background: #fff;
			}
			.mono {
			font-family: Menlo, Monaco, Consolas, "Liberation Mono", monospace;
			font-size: 12px;
			}
			@media (max-width: 1280px) {
			.summary-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
			}
			</style>
			</head>
			<body>
			<div id="app" class="container">
			<div class="hero">
			<div class="hero-top">
			<div class="hero-title">
			<div v-html="headerIcon" style="display:flex;"></div>
			<div>
			<div class="main">AI配置 - LLM路由</div>
			<div class="sub">支持多API、多模型、多Key，额度耗尽或故障自动切换</div>
			</div>
			</div>
			<div>
			<el-button type="primary" size="mini" @click="addRoute">新增路由</el-button>
			<el-button size="mini" @click="loadRoutes">刷新</el-button>
			</div>
			</div>
			<div class="summary-grid">
			<div class="summary-card">
			<div class="k">总路由</div>
			<div class="v">{{ summary.total }}</div>
			</div>
			<div class="summary-card">
			<div class="k">启用</div>
			<div class="v">{{ summary.enabled }}</div>
			</div>
			<div class="summary-card">
			<div class="k">故障切换开启</div>
			<div class="v">{{ summary.errorSwitch }}</div>
			</div>
			<div class="summary-card">
			<div class="k">额度切换开启</div>
			<div class="v">{{ summary.quotaSwitch }}</div>
			</div>
			<div class="summary-card">
			<div class="k">冷却中</div>
			<div class="v">{{ summary.cooling }}</div>
			</div>
			</div>
			</div>

			<div class="table-shell">
			<el-table :data="routes" stripe height="72vh" v-loading="loading" :header-cell-style="{background:'#f7f9fc', color:'#2e3a4d', fontWeight:600}">
			<el-table-column label="名称" width="170">
			<template slot-scope="scope">
			<el-input v-model="scope.row.name" size="mini"></el-input>
			</template>
			</el-table-column>

			<el-table-column label="Base URL" min-width="220">
			<template slot-scope="scope">
			<el-input v-model="scope.row.baseUrl" class="mono" size="mini" placeholder="必填，例如: https://api.deepseek.com"></el-input>
			</template>
			</el-table-column>

			<el-table-column label="模型" width="180">
			<template slot-scope="scope">
			<el-input v-model="scope.row.model" class="mono" size="mini" placeholder="必填，例如: deepseek-chat"></el-input>
			</template>
			</el-table-column>

			<el-table-column label="API Key" min-width="220">
			<template slot-scope="scope">
			<el-input v-model="scope.row.apiKey" class="mono" type="password" size="mini" placeholder="必填"></el-input>
			</template>
			</el-table-column>

			<el-table-column label="优先级" width="90">
			<template slot-scope="scope">
			<el-input-number v-model="scope.row.priority" size="mini" :min="0" :max="99999" :controls="false" style="width:80px;"></el-input-number>
			</template>
			</el-table-column>

			<el-table-column label="状态" width="70">
			<template slot-scope="scope">
			<el-switch v-model="scope.row.status" :active-value="1" :inactive-value="0"></el-switch>
			</template>
			</el-table-column>

			<el-table-column label="思考" width="70">
			<template slot-scope="scope">
			<el-switch v-model="scope.row.thinking" :active-value="1" :inactive-value="0"></el-switch>
			</template>
			</el-table-column>

			<el-table-column label="额度切换" width="90">
			<template slot-scope="scope">
			<el-switch v-model="scope.row.switchOnQuota" :active-value="1" :inactive-value="0"></el-switch>
			</template>
			</el-table-column>

			<el-table-column label="故障切换" width="90">
			<template slot-scope="scope">
			<el-switch v-model="scope.row.switchOnError" :active-value="1" :inactive-value="0"></el-switch>
			</template>
			</el-table-column>

			<el-table-column label="冷却秒数" width="100">
			<template slot-scope="scope">
			<el-input-number v-model="scope.row.cooldownSeconds" size="mini" :min="0" :max="86400" :controls="false" style="width:90px;"></el-input-number>
			</template>
			</el-table-column>

			<el-table-column label="统计" min-width="220">
			<template slot-scope="scope">
			<div>成功: {{ scope.row.successCount \|\| 0 }} / 失败: {{ scope.row.failCount \|\| 0 }} / 连续失败: {{ scope.row.consecutiveFailCount \|\| 0 }}</div>
			<div style="color:#909399;">冷却到: {{ scope.row.cooldownUntil \|\| '-' }}</div>
			<div style="color:#909399;">最近错误: {{ scope.row.lastError \|\| '-' }}</div>
			</template>
			</el-table-column>

			<el-table-column label="操作" width="120" fixed="right" align="center">
			<template slot-scope="scope">
			<el-dropdown trigger="click" @command="function(cmd){ handleRouteCommand(cmd, scope.row, scope.$index); }">
			<el-button size="mini" type="primary" plain>
			操作<i class="el-icon-arrow-down el-icon--right"></i>
			</el-button>
			<el-dropdown-menu slot="dropdown">
			<el-dropdown-item command="test" :disabled="scope.row.__testing === true">
			{{ scope.row.__testing === true ? '测试中...' : '测试' }}
			</el-dropdown-item>
			<el-dropdown-item command="save">保存</el-dropdown-item>
			<el-dropdown-item command="cooldown">清冷却</el-dropdown-item>
			<el-dropdown-item command="delete" divided>删除</el-dropdown-item>
			</el-dropdown-menu>
			</el-dropdown>
			</template>
			</el-table-column>
			</el-table>
			</div>
			</div>

			<script type="text/javascript" src="../../static/vue/js/vue.min.js"></script>
			<script type="text/javascript" src="../../static/vue/element/element.js"></script>
			<script type="text/javascript" src="../../static/js/common.js" charset="utf-8"></script>
			<script>
			new Vue({
			el: '#app',
			data: function() {
			return {
			headerIcon: getAiIconHtml(34, 34),
			loading: false,
			routes: []
			};
			},
			computed: {
			summary: function() {
			var now = Date.now();
			var total = this.routes.length;
			var enabled = 0, quotaSwitch = 0, errorSwitch = 0, cooling = 0;
			for (var i = 0; i < this.routes.length; i++) {
			var x = this.routes[i];
			if (x.status === 1) enabled++;
			if (x.switchOnQuota === 1) quotaSwitch++;
			if (x.switchOnError === 1) errorSwitch++;
			if (x.cooldownUntil && new Date(x.cooldownUntil).getTime() > now) cooling++;
			}
			return { total: total, enabled: enabled, quotaSwitch: quotaSwitch, errorSwitch: errorSwitch, cooling: cooling };
			}
			},
			methods: {
			authHeaders: function() {
			return { 'token': localStorage.getItem('token') };
			},
			handleRouteCommand: function(command, route, idx) {
			if (command === 'test') return this.testRoute(route);
			if (command === 'save') return this.saveRoute(route);
			if (command === 'cooldown') return this.clearCooldown(route);
			if (command === 'delete') return this.deleteRoute(route, idx);
			},
			loadRoutes: function() {
			var self = this;
			self.loading = true;
			fetch(baseUrl + '/ai/llm/config/list/auth', { headers: self.authHeaders() })
			.then(function(r){ return r.json(); })
			.then(function(res){
			self.loading = false;
			if (res && res.code === 200) {
			self.routes = Array.isArray(res.data) ? res.data : [];
			} else {
			self.$message.error((res && res.msg) ? res.msg : '加载失败');
			}
			})
			.catch(function(){
			self.loading = false;
			self.$message.error('加载失败');
			});
			},
			addRoute: function() {
			this.routes.unshift({
			id: null,
			name: '',
			baseUrl: '',
			apiKey: '',
			model: '',
			thinking: 0,
			priority: 100,
			status: 1,
			switchOnQuota: 1,
			switchOnError: 1,
			cooldownSeconds: 300,
			successCount: 0,
			failCount: 0,
			consecutiveFailCount: 0,
			cooldownUntil: null,
			lastError: null
			});
			},
			buildPayload: function(route) {
			return {
			id: route.id,
			name: route.name,
			baseUrl: route.baseUrl,
			apiKey: route.apiKey,
			model: route.model,
			thinking: route.thinking,
			priority: route.priority,
			status: route.status,
			switchOnQuota: route.switchOnQuota,
			switchOnError: route.switchOnError,
			cooldownSeconds: route.cooldownSeconds,
			memo: route.memo
			};
			},
			saveRoute: function(route) {
			var self = this;
			fetch(baseUrl + '/ai/llm/config/save/auth', {
			method: 'POST',
			headers: Object.assign({ 'Content-Type': 'application/json' }, self.authHeaders()),
			body: JSON.stringify(self.buildPayload(route))
			})
			.then(function(r){ return r.json(); })
			.then(function(res){
			if (res && res.code === 200) {
			self.$message.success('保存成功');
			self.loadRoutes();
			} else {
			self.$message.error((res && res.msg) ? res.msg : '保存失败');
			}
			})
			.catch(function(){
			self.$message.error('保存失败');
			});
			},
			deleteRoute: function(route, idx) {
			var self = this;
			if (!route.id) {
			self.routes.splice(idx, 1);
			return;
			}
			self.$confirm('确定删除该路由吗？', '提示', { type: 'warning' }).then(function() {
			fetch(baseUrl + '/ai/llm/config/delete/auth?id=' + encodeURIComponent(route.id), {
			method: 'POST',
			headers: self.authHeaders()
			})
			.then(function(r){ return r.json(); })
			.then(function(res){
			if (res && res.code === 200) {
			self.$message.success('删除成功');
			self.loadRoutes();
			} else {
			self.$message.error((res && res.msg) ? res.msg : '删除失败');
			}
			})
			.catch(function(){
			self.$message.error('删除失败');
			});
			}).catch(function(){});
			},
			clearCooldown: function(route) {
			var self = this;
			if (!route.id) return;
			fetch(baseUrl + '/ai/llm/config/clearCooldown/auth?id=' + encodeURIComponent(route.id), {
			method: 'POST',
			headers: self.authHeaders()
			})
			.then(function(r){ return r.json(); })
			.then(function(res){
			if (res && res.code === 200) {
			self.$message.success('已清除冷却');
			self.loadRoutes();
			} else {
			self.$message.error((res && res.msg) ? res.msg : '操作失败');
			}
			})
			.catch(function(){
			self.$message.error('操作失败');
			});
			},
			testRoute: function(route) {
			var self = this;
			if (route.__testing === true) return;
			if (!route.id) {
			self.$message.warning('当前是未保存配置，测试通过后仍需先保存才会生效');
			}
			self.$set(route, '__testing', true);
			fetch(baseUrl + '/ai/llm/config/test/auth', {
			method: 'POST',
			headers: Object.assign({ 'Content-Type': 'application/json' }, self.authHeaders()),
			body: JSON.stringify(self.buildPayload(route))
			})
			.then(function(r){ return r.json(); })
			.then(function(res){
			if (!res \|\| res.code !== 200) {
			self.$message.error((res && res.msg) ? res.msg : '测试失败');
			return;
			}
			var data = res.data \|\| {};
			var ok = data.ok === true;
			var title = ok ? '测试成功' : '测试失败';
			var msg = ''
			+ '路由: ' + (route.name \|\| '-') + '\n'
			+ 'Base URL: ' + (route.baseUrl \|\| '-') + '\n'
			+ '状态码: ' + (data.statusCode != null ? data.statusCode : '-') + '\n'
			+ '耗时: ' + (data.latencyMs != null ? data.latencyMs : '-') + ' ms\n'
			+ '结果: ' + (data.message \|\| '-') + '\n'
			+ '返回片段: ' + (data.responseSnippet \|\| '-');
			self.$alert(msg, title, { confirmButtonText: '确定', type: ok ? 'success' : 'error' });
			})
			.catch(function(){
			self.$message.error('测试失败');
			})
			.finally(function(){
			self.$set(route, '__testing', false);
			});
			}
			},
			mounted: function() {
			this.loadRoutes();
			}
			});
			</script>
			</body>
			</html>