From 6d29b4cb573525c1092a67ef37aacf7ef2233723 Mon Sep 17 00:00:00 2001
From: Junjie <fallin.jie@qq.com>
Date: 星期二, 03 三月 2026 11:43:07 +0800
Subject: [PATCH] #

---
 src/main/java/com/zy/ai/entity/LlmRouteConfig.java                  |  249 +++++++
 src/main/java/com/zy/ai/mapper/LlmRouteConfigMapper.java            |   11 
 src/main/resources/sql/20260303_add_ai_config_menu.sql              |   46 +
 src/main/resources/sql/20260303_create_sys_llm_route.sql            |   32 +
 src/main/java/com/zy/ai/controller/WcsDiagnosisController.java      |    4 
 src/main/java/com/zy/ai/service/impl/LlmRouteConfigServiceImpl.java |   11 
 src/main/webapp/views/ai/llm_config.html                            |  421 +++++++++++++
 /dev/null                                                           |   55 -
 src/main/java/com/zy/ai/service/LlmChatService.java                 |  566 +++++++++-------
 src/main/java/com/zy/ai/service/LlmRouteConfigService.java          |    7 
 src/main/java/com/zy/ai/service/LlmRoutingService.java              |  270 ++++++++
 src/main/java/com/zy/ai/controller/LlmRouteConfigController.java    |  131 ++++
 src/main/java/com/zy/ai/service/WcsDiagnosisService.java            |   19 
 src/main/resources/mapper/LlmRouteConfigMapper.xml                  |   29 
 src/main/resources/application.yml                                  |   28 
 15 files changed, 1,538 insertions(+), 341 deletions(-)

diff --git a/src/main/java/com/zy/ai/config/LlmConfig.java b/src/main/java/com/zy/ai/config/LlmConfig.java
deleted file mode 100644
index 385286d..0000000
--- a/src/main/java/com/zy/ai/config/LlmConfig.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package com.zy.ai.config;
-
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.context.annotation.Bean;
-import org.springframework.context.annotation.Configuration;
-import org.springframework.web.reactive.function.client.WebClient;
-
-@Configuration
-public class LlmConfig {
-
-    @Value("${llm.base-url}")
-    private String baseUrl;
-
-    @Bean
-    public WebClient llmWebClient() {
-        return WebClient.builder()
-                .baseUrl(baseUrl)
-                .build();
-    }
-}
\ No newline at end of file
diff --git a/src/main/java/com/zy/ai/controller/LlmRouteConfigController.java b/src/main/java/com/zy/ai/controller/LlmRouteConfigController.java
new file mode 100644
index 0000000..f43f408
--- /dev/null
+++ b/src/main/java/com/zy/ai/controller/LlmRouteConfigController.java
@@ -0,0 +1,131 @@
+package com.zy.ai.controller;
+
+import com.baomidou.mybatisplus.mapper.EntityWrapper;
+import com.core.annotations.ManagerAuth;
+import com.core.common.R;
+import com.zy.ai.entity.LlmRouteConfig;
+import com.zy.ai.service.LlmRouteConfigService;
+import com.zy.ai.service.LlmRoutingService;
+import com.zy.common.web.BaseController;
+import lombok.RequiredArgsConstructor;
+import org.springframework.web.bind.annotation.*;
+
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+
+@RestController
+@RequestMapping("/ai/llm/config")
+@RequiredArgsConstructor
+public class LlmRouteConfigController extends BaseController {
+
+    private final LlmRouteConfigService llmRouteConfigService;
+    private final LlmRoutingService llmRoutingService;
+
+    @GetMapping("/list/auth")
+    @ManagerAuth
+    public R list() {
+        EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>();
+        wrapper.orderBy("priority", true).orderBy("id", true);
+        List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper);
+        return R.ok(list);
+    }
+
+    @PostMapping("/save/auth")
+    @ManagerAuth
+    public R save(@RequestBody LlmRouteConfig config) {
+        if (config == null) {
+            return R.error("鍙傛暟涓嶈兘涓虹┖");
+        }
+
+        if (isBlank(config.getBaseUrl()) || isBlank(config.getApiKey()) || isBlank(config.getModel())) {
+            return R.error("蹇呴』濉啓 baseUrl/apiKey/model");
+        }
+
+        if (config.getId() == null) {
+            llmRoutingService.fillAndNormalize(config, true);
+            llmRouteConfigService.insert(config);
+        } else {
+            LlmRouteConfig db = llmRouteConfigService.selectById(config.getId());
+            if (db == null) {
+                return R.error("閰嶇疆涓嶅瓨鍦�");
+            }
+            // 淇濈暀缁熻瀛楁锛岄伩鍏嶅墠绔瑕嗙洊
+            Integer failCount = db.getFailCount();
+            Integer successCount = db.getSuccessCount();
+            Integer consecutiveFailCount = db.getConsecutiveFailCount();
+            Date lastFailTime = db.getLastFailTime();
+            Date lastUsedTime = db.getLastUsedTime();
+            String lastError = db.getLastError();
+
+            llmRoutingService.fillAndNormalize(config, false);
+            config.setFailCount(failCount);
+            config.setSuccessCount(successCount);
+            config.setConsecutiveFailCount(consecutiveFailCount);
+            config.setLastFailTime(lastFailTime);
+            config.setLastUsedTime(lastUsedTime);
+            config.setLastError(lastError);
+            config.setCreateTime(db.getCreateTime());
+            llmRouteConfigService.updateById(config);
+        }
+
+        llmRoutingService.evictCache();
+        return R.ok(config);
+    }
+
+    @PostMapping("/delete/auth")
+    @ManagerAuth
+    public R delete(@RequestParam("id") Long id) {
+        if (id == null) {
+            return R.error("id涓嶈兘涓虹┖");
+        }
+        llmRouteConfigService.deleteById(id);
+        llmRoutingService.evictCache();
+        return R.ok();
+    }
+
+    @PostMapping("/clearCooldown/auth")
+    @ManagerAuth
+    public R clearCooldown(@RequestParam("id") Long id) {
+        if (id == null) {
+            return R.error("id涓嶈兘涓虹┖");
+        }
+        LlmRouteConfig cfg = llmRouteConfigService.selectById(id);
+        if (cfg == null) {
+            return R.error("閰嶇疆涓嶅瓨鍦�");
+        }
+        cfg.setCooldownUntil(null);
+        cfg.setConsecutiveFailCount(0);
+        cfg.setUpdateTime(new Date());
+        llmRouteConfigService.updateById(cfg);
+        llmRoutingService.evictCache();
+        return R.ok();
+    }
+
+    @PostMapping("/test/auth")
+    @ManagerAuth
+    public R test(@RequestBody LlmRouteConfig config) {
+        if (config == null) {
+            return R.error("鍙傛暟涓嶈兘涓虹┖");
+        }
+        if (isBlank(config.getBaseUrl()) || isBlank(config.getApiKey()) || isBlank(config.getModel())) {
+            return R.error("娴嬭瘯澶辫触锛氬繀椤诲~鍐� baseUrl/apiKey/model");
+        }
+        Map<String, Object> data = llmRoutingService.testRoute(config);
+        if (Boolean.TRUE.equals(data.get("ok")) && config.getId() != null) {
+            LlmRouteConfig db = llmRouteConfigService.selectById(config.getId());
+            if (db != null) {
+                db.setCooldownUntil(null);
+                db.setConsecutiveFailCount(0);
+                db.setUpdateTime(new Date());
+                llmRouteConfigService.updateById(db);
+                llmRoutingService.evictCache();
+            }
+        }
+        return R.ok(data);
+    }
+
+    private boolean isBlank(String s) {
+        return s == null || s.trim().isEmpty();
+    }
+}
diff --git a/src/main/java/com/zy/ai/controller/WcsDiagnosisController.java b/src/main/java/com/zy/ai/controller/WcsDiagnosisController.java
index 15857a0..127c9ed 100644
--- a/src/main/java/com/zy/ai/controller/WcsDiagnosisController.java
+++ b/src/main/java/com/zy/ai/controller/WcsDiagnosisController.java
@@ -33,7 +33,7 @@
                 WcsDiagnosisRequest request = aiUtils.makeAiRequest(1000, "瀵瑰綋鍓嶇郴缁熻繘琛屽贰妫�锛屽鏋滄湁寮傚父鎯呭喌灏辫繘琛岃缁嗙殑鍒嗘瀽锛屽鏋滄病鏈夊紓甯告儏鍐靛垯褰撴垚涓�娆℃鏌n\n");
                 wcsDiagnosisService.diagnoseStream(request, emitter);
             } catch (Exception e) {
-                emitter.completeWithError(e);
+                try { emitter.complete(); } catch (Exception ignore) {}
             }
         }).start();
 
@@ -50,7 +50,7 @@
                 WcsDiagnosisRequest request = aiUtils.makeAiRequest(100, null);
                 wcsDiagnosisService.askStream(request, prompt, chatId, reset, emitter);
             } catch (Exception e) {
-                emitter.completeWithError(e);
+                try { emitter.complete(); } catch (Exception ignore) {}
             }
         }).start();
         return emitter;
diff --git a/src/main/java/com/zy/ai/entity/LlmRouteConfig.java b/src/main/java/com/zy/ai/entity/LlmRouteConfig.java
new file mode 100644
index 0000000..418ccb9
--- /dev/null
+++ b/src/main/java/com/zy/ai/entity/LlmRouteConfig.java
@@ -0,0 +1,249 @@
+package com.zy.ai.entity;
+
+import com.baomidou.mybatisplus.annotations.TableField;
+import com.baomidou.mybatisplus.annotations.TableId;
+import com.baomidou.mybatisplus.annotations.TableName;
+import com.baomidou.mybatisplus.enums.IdType;
+
+import java.io.Serializable;
+import java.util.Date;
+
+@TableName("sys_llm_route")
+public class LlmRouteConfig implements Serializable {
+
+    private static final long serialVersionUID = 1L;
+
+    @TableId(value = "id", type = IdType.AUTO)
+    private Long id;
+
+    private String name;
+
+    @TableField("base_url")
+    private String baseUrl;
+
+    @TableField("api_key")
+    private String apiKey;
+
+    private String model;
+
+    /**
+     * 1 寮�鍚繁搴︽�濊�� 0 鍏抽棴
+     */
+    private Short thinking;
+
+    /**
+     * 鏁板瓧瓒婂皬浼樺厛绾ц秺楂�
+     */
+    private Integer priority;
+
+    /**
+     * 1 鍚敤 0 绂佺敤
+     */
+    private Short status;
+
+    @TableField("switch_on_quota")
+    private Short switchOnQuota;
+
+    @TableField("switch_on_error")
+    private Short switchOnError;
+
+    @TableField("cooldown_seconds")
+    private Integer cooldownSeconds;
+
+    @TableField("cooldown_until")
+    private Date cooldownUntil;
+
+    @TableField("fail_count")
+    private Integer failCount;
+
+    @TableField("success_count")
+    private Integer successCount;
+
+    @TableField("consecutive_fail_count")
+    private Integer consecutiveFailCount;
+
+    @TableField("last_error")
+    private String lastError;
+
+    @TableField("last_used_time")
+    private Date lastUsedTime;
+
+    @TableField("last_fail_time")
+    private Date lastFailTime;
+
+    @TableField("create_time")
+    private Date createTime;
+
+    @TableField("update_time")
+    private Date updateTime;
+
+    private String memo;
+
+    public Long getId() {
+        return id;
+    }
+
+    public void setId(Long id) {
+        this.id = id;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getBaseUrl() {
+        return baseUrl;
+    }
+
+    public void setBaseUrl(String baseUrl) {
+        this.baseUrl = baseUrl;
+    }
+
+    public String getApiKey() {
+        return apiKey;
+    }
+
+    public void setApiKey(String apiKey) {
+        this.apiKey = apiKey;
+    }
+
+    public String getModel() {
+        return model;
+    }
+
+    public void setModel(String model) {
+        this.model = model;
+    }
+
+    public Short getThinking() {
+        return thinking;
+    }
+
+    public void setThinking(Short thinking) {
+        this.thinking = thinking;
+    }
+
+    public Integer getPriority() {
+        return priority;
+    }
+
+    public void setPriority(Integer priority) {
+        this.priority = priority;
+    }
+
+    public Short getStatus() {
+        return status;
+    }
+
+    public void setStatus(Short status) {
+        this.status = status;
+    }
+
+    public Short getSwitchOnQuota() {
+        return switchOnQuota;
+    }
+
+    public void setSwitchOnQuota(Short switchOnQuota) {
+        this.switchOnQuota = switchOnQuota;
+    }
+
+    public Short getSwitchOnError() {
+        return switchOnError;
+    }
+
+    public void setSwitchOnError(Short switchOnError) {
+        this.switchOnError = switchOnError;
+    }
+
+    public Integer getCooldownSeconds() {
+        return cooldownSeconds;
+    }
+
+    public void setCooldownSeconds(Integer cooldownSeconds) {
+        this.cooldownSeconds = cooldownSeconds;
+    }
+
+    public Date getCooldownUntil() {
+        return cooldownUntil;
+    }
+
+    public void setCooldownUntil(Date cooldownUntil) {
+        this.cooldownUntil = cooldownUntil;
+    }
+
+    public Integer getFailCount() {
+        return failCount;
+    }
+
+    public void setFailCount(Integer failCount) {
+        this.failCount = failCount;
+    }
+
+    public Integer getSuccessCount() {
+        return successCount;
+    }
+
+    public void setSuccessCount(Integer successCount) {
+        this.successCount = successCount;
+    }
+
+    public Integer getConsecutiveFailCount() {
+        return consecutiveFailCount;
+    }
+
+    public void setConsecutiveFailCount(Integer consecutiveFailCount) {
+        this.consecutiveFailCount = consecutiveFailCount;
+    }
+
+    public String getLastError() {
+        return lastError;
+    }
+
+    public void setLastError(String lastError) {
+        this.lastError = lastError;
+    }
+
+    public Date getLastUsedTime() {
+        return lastUsedTime;
+    }
+
+    public void setLastUsedTime(Date lastUsedTime) {
+        this.lastUsedTime = lastUsedTime;
+    }
+
+    public Date getLastFailTime() {
+        return lastFailTime;
+    }
+
+    public void setLastFailTime(Date lastFailTime) {
+        this.lastFailTime = lastFailTime;
+    }
+
+    public Date getCreateTime() {
+        return createTime;
+    }
+
+    public void setCreateTime(Date createTime) {
+        this.createTime = createTime;
+    }
+
+    public Date getUpdateTime() {
+        return updateTime;
+    }
+
+    public void setUpdateTime(Date updateTime) {
+        this.updateTime = updateTime;
+    }
+
+    public String getMemo() {
+        return memo;
+    }
+
+    public void setMemo(String memo) {
+        this.memo = memo;
+    }
+}
diff --git a/src/main/java/com/zy/ai/mapper/LlmRouteConfigMapper.java b/src/main/java/com/zy/ai/mapper/LlmRouteConfigMapper.java
new file mode 100644
index 0000000..7397da0
--- /dev/null
+++ b/src/main/java/com/zy/ai/mapper/LlmRouteConfigMapper.java
@@ -0,0 +1,11 @@
+package com.zy.ai.mapper;
+
+import com.baomidou.mybatisplus.mapper.BaseMapper;
+import com.zy.ai.entity.LlmRouteConfig;
+import org.apache.ibatis.annotations.Mapper;
+import org.springframework.stereotype.Repository;
+
+@Mapper
+@Repository
+public interface LlmRouteConfigMapper extends BaseMapper<LlmRouteConfig> {
+}
diff --git a/src/main/java/com/zy/ai/service/LlmChatService.java b/src/main/java/com/zy/ai/service/LlmChatService.java
index ddb333a..4e6bf19 100644
--- a/src/main/java/com/zy/ai/service/LlmChatService.java
+++ b/src/main/java/com/zy/ai/service/LlmChatService.java
@@ -1,7 +1,11 @@
 package com.zy.ai.service;
 
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
 import com.zy.ai.entity.ChatCompletionRequest;
 import com.zy.ai.entity.ChatCompletionResponse;
+import com.zy.ai.entity.LlmRouteConfig;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.beans.factory.annotation.Value;
@@ -9,37 +13,33 @@
 import org.springframework.http.MediaType;
 import org.springframework.stereotype.Service;
 import org.springframework.web.reactive.function.client.WebClient;
-import reactor.core.publisher.Mono;
 import reactor.core.publisher.Flux;
 
-import java.util.HashMap;
+import java.util.ArrayList;
 import java.util.List;
-import java.util.function.Consumer;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
-import com.alibaba.fastjson.JSON;
-import com.alibaba.fastjson.JSONArray;
-import com.alibaba.fastjson.JSONObject;
+import java.util.function.Consumer;
 
 @Slf4j
 @Service
 @RequiredArgsConstructor
 public class LlmChatService {
 
-    private final WebClient llmWebClient;
+    private final LlmRoutingService llmRoutingService;
 
-    @Value("${llm.api-key}")
-    private String apiKey;
+    @Value("${llm.base-url:}")
+    private String fallbackBaseUrl;
 
-    @Value("${llm.model}")
-    private String model;
+    @Value("${llm.api-key:}")
+    private String fallbackApiKey;
 
-    @Value("${llm.pythonPlatformUrl}")
-    private String pythonPlatformUrl;
+    @Value("${llm.model:}")
+    private String fallbackModel;
 
-    @Value("${llm.thinking}")
-    private String thinking;
+    @Value("${llm.thinking:false}")
+    private String fallbackThinking;
 
     /**
      * 閫氱敤瀵硅瘽鏂规硶锛氫紶鍏� messages锛岃繑鍥炲ぇ妯″瀷鏂囨湰鍥炲
@@ -49,27 +49,12 @@
                        Integer maxTokens) {
 
         ChatCompletionRequest req = new ChatCompletionRequest();
-        req.setModel(model);
         req.setMessages(messages);
         req.setTemperature(temperature != null ? temperature : 0.3);
         req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
         req.setStream(false);
 
-        ChatCompletionResponse response = llmWebClient.post()
-                .uri("/chat/completions")
-                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
-                .contentType(MediaType.APPLICATION_JSON)
-                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
-                .bodyValue(req)
-                .exchangeToMono(resp -> resp.bodyToFlux(String.class)
-                        .collectList()
-                        .map(list -> {
-                            String payload = String.join("\n\n", list);
-                            return parseCompletion(payload);
-                        }))
-                .doOnError(ex -> log.error("璋冪敤 LLM 澶辫触", ex))
-                .onErrorResume(ex -> Mono.empty())
-                .block();
+        ChatCompletionResponse response = complete(req);
 
         if (response == null ||
                 response.getChoices() == null ||
@@ -88,17 +73,10 @@
                                                  Integer maxTokens,
                                                  List<Object> tools) {
         ChatCompletionRequest req = new ChatCompletionRequest();
-        req.setModel(model);
         req.setMessages(messages);
         req.setTemperature(temperature != null ? temperature : 0.3);
         req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
         req.setStream(false);
-
-        if(thinking.equals("enable")) {
-            ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking();
-            thinking.setType("enable");
-            req.setThinking(thinking);
-        }
         if (tools != null && !tools.isEmpty()) {
             req.setTools(tools);
             req.setTool_choice("auto");
@@ -107,26 +85,42 @@
     }
 
     public ChatCompletionResponse complete(ChatCompletionRequest req) {
-        try {
-            return llmWebClient.post()
-                    .uri("/chat/completions")
-                    .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
-                    .contentType(MediaType.APPLICATION_JSON)
-                    .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
-                    .bodyValue(req)
-                    .exchangeToMono(resp -> resp.bodyToFlux(String.class)
-                            .collectList()
-                            .map(list -> {
-                                String payload = String.join("\n\n", list);
-                                return parseCompletion(payload);
-                            }))
-                    .doOnError(ex -> log.error("璋冪敤 LLM 澶辫触", ex))
-                    .onErrorResume(ex -> Mono.empty())
-                    .block();
-        } catch (Exception e) {
-            log.error("璋冪敤 LLM 澶辫触", e);
+        List<ResolvedRoute> routes = resolveRoutes();
+        if (routes.isEmpty()) {
+            log.error("璋冪敤 LLM 澶辫触: 鏈厤缃彲鐢� LLM 璺敱");
             return null;
         }
+
+        Throwable last = null;
+        for (int i = 0; i < routes.size(); i++) {
+            ResolvedRoute route = routes.get(i);
+            boolean hasNext = i < routes.size() - 1;
+            try {
+                ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, false);
+                ChatCompletionResponse resp = callCompletion(route, routeReq);
+                if (!isValidCompletion(resp)) {
+                    throw new RuntimeException("LLM 鍝嶅簲涓虹┖");
+                }
+                markSuccess(route);
+                return resp;
+            } catch (Throwable ex) {
+                last = ex;
+                boolean quota = isQuotaExhausted(ex);
+                boolean canSwitch = shouldSwitch(route, quota);
+                markFailure(route, ex, canSwitch);
+                if (hasNext && canSwitch) {
+                    log.warn("LLM 鍒囨崲鍒颁笅涓�璺敱, current={}, reason={}", route.tag(), errorText(ex));
+                    continue;
+                }
+                log.error("璋冪敤 LLM 澶辫触, route={}", route.tag(), ex);
+                break;
+            }
+        }
+
+        if (last != null) {
+            log.error("璋冪敤 LLM 鍏ㄩ儴璺敱澶辫触: {}", errorText(last));
+        }
+        return null;
     }
 
     public void chatStream(List<ChatCompletionRequest.Message> messages,
@@ -137,92 +131,12 @@
                            Consumer<Throwable> onError) {
 
         ChatCompletionRequest req = new ChatCompletionRequest();
-        req.setModel(model);
         req.setMessages(messages);
         req.setTemperature(temperature != null ? temperature : 0.3);
         req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
         req.setStream(true);
 
-
-        Flux<String> flux = llmWebClient.post()
-                .uri("/chat/completions")
-                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
-                .contentType(MediaType.APPLICATION_JSON)
-                .accept(MediaType.TEXT_EVENT_STREAM)
-                .bodyValue(req)
-                .retrieve()
-                .bodyToFlux(String.class)
-                .doOnError(ex -> log.error("璋冪敤 LLM 娴佸紡澶辫触", ex));
-
-        AtomicBoolean doneSeen = new AtomicBoolean(false);
-        AtomicBoolean errorSeen = new AtomicBoolean(false);
-        LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();
-
-        Thread drain = new Thread(() -> {
-            try {
-                while (true) {
-                    String s = queue.poll(2, TimeUnit.SECONDS);
-                    if (s != null) {
-                        try { onChunk.accept(s); } catch (Exception ignore) {}
-                    }
-                    if (doneSeen.get() && queue.isEmpty()) {
-                        if (!errorSeen.get()) {
-                            try { if (onComplete != null) onComplete.run(); } catch (Exception ignore) {}
-                        }
-                        break;
-                    }
-                }
-            } catch (InterruptedException ignore) {
-                ignore.printStackTrace();
-            }
-        });
-        drain.setDaemon(true);
-        drain.start();
-
-        flux.subscribe(payload -> {
-            if (payload == null || payload.isEmpty()) return;
-            String[] events = payload.split("\\r?\\n\\r?\\n");
-            for (String part : events) {
-                String s = part;
-                if (s == null || s.isEmpty()) continue;
-                if (s.startsWith("data:")) {
-                    s = s.substring(5);
-                    if (s.startsWith(" ")) s = s.substring(1);
-                }
-                if ("[DONE]".equals(s.trim())) {
-                    doneSeen.set(true);
-                    continue;
-                }
-                try {
-                    JSONObject obj = JSON.parseObject(s);
-                    JSONArray choices = obj.getJSONArray("choices");
-                    if (choices != null && !choices.isEmpty()) {
-                        JSONObject c0 = choices.getJSONObject(0);
-                        JSONObject delta = c0.getJSONObject("delta");
-                        if (delta != null) {
-                            String content = delta.getString("content");
-                            if (content != null) {
-                                try { queue.offer(content); } catch (Exception ignore) {}
-                            }
-                        }
-                    }
-                } catch (Exception e) {
-                    e.printStackTrace();
-                }
-            }
-        }, err -> {
-            errorSeen.set(true);
-            doneSeen.set(true);
-            if (onError != null) onError.accept(err);
-        }, () -> {
-            if (!doneSeen.get()) {
-                errorSeen.set(true);
-                doneSeen.set(true);
-                if (onError != null) onError.accept(new RuntimeException("LLM 娴佹剰澶栧畬鎴�"));
-            } else {
-                doneSeen.set(true);
-            }
-        });
+        streamWithFailover(req, onChunk, onComplete, onError);
     }
 
     public void chatStreamWithTools(List<ChatCompletionRequest.Message> messages,
@@ -233,120 +147,46 @@
                                     Runnable onComplete,
                                     Consumer<Throwable> onError) {
         ChatCompletionRequest req = new ChatCompletionRequest();
-        req.setModel(model);
         req.setMessages(messages);
         req.setTemperature(temperature != null ? temperature : 0.3);
         req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
         req.setStream(true);
-        if(thinking.equals("enable")) {
-            ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking();
-            thinking.setType("enable");
-            req.setThinking(thinking);
-        }
         if (tools != null && !tools.isEmpty()) {
             req.setTools(tools);
             req.setTool_choice("auto");
         }
-        Flux<String> flux = llmWebClient.post()
-                .uri("/chat/completions")
-                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
-                .contentType(MediaType.APPLICATION_JSON)
-                .accept(MediaType.TEXT_EVENT_STREAM)
-                .bodyValue(req)
-                .retrieve()
-                .bodyToFlux(String.class)
-                .doOnError(ex -> log.error("璋冪敤 LLM 娴佸紡澶辫触", ex));
-
-        AtomicBoolean doneSeen = new AtomicBoolean(false);
-        AtomicBoolean errorSeen = new AtomicBoolean(false);
-        LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();
-
-        Thread drain = new Thread(() -> {
-            try {
-                while (true) {
-                    String s = queue.poll(5, TimeUnit.SECONDS);
-                    if (s != null) {
-                        try { onChunk.accept(s); } catch (Exception ignore) {}
-                    }
-                    if (doneSeen.get() && queue.isEmpty()) {
-                        if (!errorSeen.get()) {
-                            try { if (onComplete != null) onComplete.run(); } catch (Exception ignore) {}
-                        }
-                        break;
-                    }
-                }
-            } catch (InterruptedException ignore) {
-                ignore.printStackTrace();
-            }
-        });
-        drain.setDaemon(true);
-        drain.start();
-
-        flux.subscribe(payload -> {
-            if (payload == null || payload.isEmpty()) return;
-            String[] events = payload.split("\\r?\\n\\r?\\n");
-            for (String part : events) {
-                String s = part;
-                if (s == null || s.isEmpty()) continue;
-                if (s.startsWith("data:")) {
-                    s = s.substring(5);
-                    if (s.startsWith(" ")) s = s.substring(1);
-                }
-                if ("[DONE]".equals(s.trim())) {
-                    doneSeen.set(true);
-                    continue;
-                }
-                try {
-                    JSONObject obj = JSON.parseObject(s);
-                    JSONArray choices = obj.getJSONArray("choices");
-                    if (choices != null && !choices.isEmpty()) {
-                        JSONObject c0 = choices.getJSONObject(0);
-                        JSONObject delta = c0.getJSONObject("delta");
-                        if (delta != null) {
-                            String content = delta.getString("content");
-                            if (content != null) {
-                                try { queue.offer(content); } catch (Exception ignore) {}
-                            }
-                        }
-                    }
-                } catch (Exception e) {
-                    e.printStackTrace();
-                }
-            }
-        }, err -> {
-            errorSeen.set(true);
-            doneSeen.set(true);
-            if (onError != null) onError.accept(err);
-        }, () -> {
-            if (!doneSeen.get()) {
-                errorSeen.set(true);
-                doneSeen.set(true);
-                if (onError != null) onError.accept(new RuntimeException("LLM 娴佹剰澶栧畬鎴�"));
-            } else {
-                doneSeen.set(true);
-            }
-        });
+        streamWithFailover(req, onChunk, onComplete, onError);
     }
 
-    public void chatStreamRunPython(String prompt, String chatId, Consumer<String> onChunk,
+    private void streamWithFailover(ChatCompletionRequest req,
+                                    Consumer<String> onChunk,
                                     Runnable onComplete,
                                     Consumer<Throwable> onError) {
-        HashMap<String, Object> req = new HashMap<>();
-        req.put("prompt", prompt);
-        req.put("chatId", chatId);
+        List<ResolvedRoute> routes = resolveRoutes();
+        if (routes.isEmpty()) {
+            if (onError != null) onError.accept(new RuntimeException("鏈厤缃彲鐢� LLM 璺敱"));
+            return;
+        }
+        attemptStream(routes, 0, req, onChunk, onComplete, onError);
+    }
 
-        Flux<String> flux = llmWebClient.post()
-                .uri(pythonPlatformUrl)
-                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
-                .contentType(MediaType.APPLICATION_JSON)
-                .accept(MediaType.TEXT_EVENT_STREAM)
-                .bodyValue(req)
-                .retrieve()
-                .bodyToFlux(String.class)
-                .doOnError(ex -> log.error("璋冪敤 LLM 娴佸紡澶辫触", ex));
+    private void attemptStream(List<ResolvedRoute> routes,
+                               int index,
+                               ChatCompletionRequest req,
+                               Consumer<String> onChunk,
+                               Runnable onComplete,
+                               Consumer<Throwable> onError) {
+        if (index >= routes.size()) {
+            if (onError != null) onError.accept(new RuntimeException("LLM 璺敱鍏ㄩ儴澶辫触"));
+            return;
+        }
+
+        ResolvedRoute route = routes.get(index);
+        ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, true);
 
         AtomicBoolean doneSeen = new AtomicBoolean(false);
         AtomicBoolean errorSeen = new AtomicBoolean(false);
+        AtomicBoolean emitted = new AtomicBoolean(false);
         LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();
 
         Thread drain = new Thread(() -> {
@@ -354,6 +194,7 @@
                 while (true) {
                     String s = queue.poll(2, TimeUnit.SECONDS);
                     if (s != null) {
+                        emitted.set(true);
                         try {
                             onChunk.accept(s);
                         } catch (Exception ignore) {
@@ -370,13 +211,12 @@
                     }
                 }
             } catch (InterruptedException ignore) {
-                ignore.printStackTrace();
             }
         });
         drain.setDaemon(true);
         drain.start();
 
-        flux.subscribe(payload -> {
+        streamFlux(route, routeReq).subscribe(payload -> {
             if (payload == null || payload.isEmpty()) return;
             String[] events = payload.split("\\r?\\n\\r?\\n");
             for (String part : events) {
@@ -390,10 +230,6 @@
                     doneSeen.set(true);
                     continue;
                 }
-                if("<think>".equals(s.trim()) || "</think>".equals(s.trim())) {
-                    queue.offer(s.trim());
-                    continue;
-                }
                 try {
                     JSONObject obj = JSON.parseObject(s);
                     JSONArray choices = obj.getJSONArray("choices");
@@ -403,30 +239,190 @@
                         if (delta != null) {
                             String content = delta.getString("content");
                             if (content != null) {
-                                try {
-                                    queue.offer(content);
-                                } catch (Exception ignore) {
-                                }
+                                queue.offer(content);
                             }
                         }
                     }
                 } catch (Exception e) {
-                    e.printStackTrace();
+                    log.warn("瑙f瀽 LLM stream 鐗囨澶辫触: {}", e.getMessage());
                 }
             }
         }, err -> {
             errorSeen.set(true);
             doneSeen.set(true);
+            boolean quota = isQuotaExhausted(err);
+            boolean canSwitch = shouldSwitch(route, quota);
+            markFailure(route, err, canSwitch);
+            if (!emitted.get() && canSwitch && index < routes.size() - 1) {
+                log.warn("LLM 璺敱澶辫触锛岃嚜鍔ㄥ垏鎹紝current={}, reason={}", route.tag(), errorText(err));
+                attemptStream(routes, index + 1, req, onChunk, onComplete, onError);
+                return;
+            }
             if (onError != null) onError.accept(err);
         }, () -> {
             if (!doneSeen.get()) {
+                RuntimeException ex = new RuntimeException("LLM 娴佹剰澶栧畬鎴�");
                 errorSeen.set(true);
                 doneSeen.set(true);
-                if (onError != null) onError.accept(new RuntimeException("LLM 娴佹剰澶栧畬鎴�"));
+                boolean canSwitch = shouldSwitch(route, false);
+                markFailure(route, ex, canSwitch);
+                if (!emitted.get() && canSwitch && index < routes.size() - 1) {
+                    log.warn("LLM 璺敱娴佸紓甯稿畬鎴愶紝鑷姩鍒囨崲锛宑urrent={}", route.tag());
+                    attemptStream(routes, index + 1, req, onChunk, onComplete, onError);
+                } else {
+                    if (onError != null) onError.accept(ex);
+                }
             } else {
+                markSuccess(route);
                 doneSeen.set(true);
             }
         });
+    }
+
+    private Flux<String> streamFlux(ResolvedRoute route, ChatCompletionRequest req) {
+        WebClient client = WebClient.builder().baseUrl(route.baseUrl).build();
+        return client.post()
+                .uri("/chat/completions")
+                .header(HttpHeaders.AUTHORIZATION, "Bearer " + route.apiKey)
+                .contentType(MediaType.APPLICATION_JSON)
+                .accept(MediaType.TEXT_EVENT_STREAM)
+                .bodyValue(req)
+                .exchangeToFlux(resp -> {
+                    int status = resp.rawStatusCode();
+                    if (status >= 200 && status < 300) {
+                        return resp.bodyToFlux(String.class);
+                    }
+                    return resp.bodyToMono(String.class)
+                            .defaultIfEmpty("")
+                            .flatMapMany(body -> Flux.error(new LlmRouteException(status, body)));
+                })
+                .doOnError(ex -> log.error("璋冪敤 LLM 娴佸紡澶辫触, route={}", route.tag(), ex));
+    }
+
+    private ChatCompletionResponse callCompletion(ResolvedRoute route, ChatCompletionRequest req) {
+        WebClient client = WebClient.builder().baseUrl(route.baseUrl).build();
+        RawCompletionResult raw = client.post()
+                .uri("/chat/completions")
+                .header(HttpHeaders.AUTHORIZATION, "Bearer " + route.apiKey)
+                .contentType(MediaType.APPLICATION_JSON)
+                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
+                .bodyValue(req)
+                .exchangeToMono(resp -> resp.bodyToFlux(String.class)
+                        .collectList()
+                        .map(list -> new RawCompletionResult(resp.rawStatusCode(), String.join("\\n\\n", list))))
+                .block();
+
+        if (raw == null) {
+            throw new RuntimeException("LLM 杩斿洖涓虹┖");
+        }
+        if (raw.statusCode < 200 || raw.statusCode >= 300) {
+            throw new LlmRouteException(raw.statusCode, raw.payload);
+        }
+        return parseCompletion(raw.payload);
+    }
+
+    private ChatCompletionRequest applyRoute(ChatCompletionRequest req, ResolvedRoute route, boolean stream) {
+        req.setModel(route.model);
+        req.setStream(stream);
+        if (route.thinkingEnabled) {
+            ChatCompletionRequest.Thinking t = new ChatCompletionRequest.Thinking();
+            t.setType("enable");
+            req.setThinking(t);
+        } else {
+            req.setThinking(null);
+        }
+        return req;
+    }
+
+    private ChatCompletionRequest cloneRequest(ChatCompletionRequest src) {
+        ChatCompletionRequest req = new ChatCompletionRequest();
+        req.setModel(src.getModel());
+        req.setMessages(src.getMessages());
+        req.setTemperature(src.getTemperature());
+        req.setMax_tokens(src.getMax_tokens());
+        req.setStream(src.getStream());
+        req.setTools(src.getTools());
+        req.setTool_choice(src.getTool_choice());
+        req.setThinking(src.getThinking());
+        return req;
+    }
+
+    private boolean isValidCompletion(ChatCompletionResponse response) {
+        if (response == null || response.getChoices() == null || response.getChoices().isEmpty()) {
+            return false;
+        }
+        ChatCompletionRequest.Message message = response.getChoices().get(0).getMessage();
+        if (message == null) {
+            return false;
+        }
+        if (!isBlank(message.getContent())) {
+            return true;
+        }
+        return message.getTool_calls() != null && !message.getTool_calls().isEmpty();
+    }
+
+    private boolean shouldSwitch(ResolvedRoute route, boolean quota) {
+        return quota ? route.switchOnQuota : route.switchOnError;
+    }
+
+    private void markSuccess(ResolvedRoute route) {
+        if (route.id != null) {
+            llmRoutingService.markSuccess(route.id);
+        }
+    }
+
+    private void markFailure(ResolvedRoute route, Throwable ex, boolean enterCooldown) {
+        if (route.id != null) {
+            llmRoutingService.markFailure(route.id, errorText(ex), enterCooldown, route.cooldownSeconds);
+        }
+    }
+
+    private String errorText(Throwable ex) {
+        if (ex == null) return "unknown";
+        if (ex instanceof LlmRouteException) {
+            LlmRouteException e = (LlmRouteException) ex;
+            String body = e.body == null ? "" : e.body;
+            if (body.length() > 240) {
+                body = body.substring(0, 240);
+            }
+            return "status=" + e.statusCode + ", body=" + body;
+        }
+        return ex.getMessage() == null ? ex.toString() : ex.getMessage();
+    }
+
+    private boolean isQuotaExhausted(Throwable ex) {
+        if (!(ex instanceof LlmRouteException)) return false;
+        LlmRouteException e = (LlmRouteException) ex;
+        if (e.statusCode == 429) return true;
+        String text = (e.body == null ? "" : e.body).toLowerCase();
+        return text.contains("insufficient_quota")
+                || text.contains("quota")
+                || text.contains("浣欓")
+                || text.contains("鐢ㄩ噺")
+                || text.contains("瓒呴檺")
+                || text.contains("rate limit");
+    }
+
+    private List<ResolvedRoute> resolveRoutes() {
+        List<ResolvedRoute> routes = new ArrayList<>();
+        List<LlmRouteConfig> dbRoutes = llmRoutingService.listAvailableRoutes();
+        for (LlmRouteConfig c : dbRoutes) {
+            routes.add(ResolvedRoute.fromDb(c));
+        }
+        // 鍏煎锛氭暟鎹簱涓虹┖鏃讹紝鍥為��鍒� yml
+        if (routes.isEmpty() && !isBlank(fallbackBaseUrl) && !isBlank(fallbackApiKey) && !isBlank(fallbackModel)) {
+            routes.add(ResolvedRoute.fromFallback(fallbackBaseUrl, fallbackApiKey, fallbackModel, isFallbackThinkingEnabled()));
+        }
+        return routes;
+    }
+
+    private boolean isFallbackThinkingEnabled() {
+        String x = fallbackThinking == null ? "" : fallbackThinking.trim().toLowerCase();
+        return "true".equals(x) || "1".equals(x) || "enable".equals(x);
+    }
+
+    private boolean isBlank(String s) {
+        return s == null || s.trim().isEmpty();
     }
 
     private ChatCompletionResponse mergeSseChunk(ChatCompletionResponse acc, String payload) {
@@ -452,7 +448,7 @@
                         ChatCompletionResponse.Choice choice = new ChatCompletionResponse.Choice();
                         ChatCompletionRequest.Message msg = new ChatCompletionRequest.Message();
                         choice.setMessage(msg);
-                        java.util.ArrayList<ChatCompletionResponse.Choice> list = new java.util.ArrayList<>();
+                        ArrayList<ChatCompletionResponse.Choice> list = new ArrayList<>();
                         list.add(choice);
                         acc.setChoices(list);
                     }
@@ -490,7 +486,8 @@
                 if (created != null) acc.setCreated(created);
                 String object = obj.getString("object");
                 if (object != null && !object.isEmpty()) acc.setObjectName(object);
-            } catch (Exception ignore) {}
+            } catch (Exception ignore) {
+            }
         }
         return acc;
     }
@@ -502,7 +499,8 @@
             if (r != null && r.getChoices() != null && !r.getChoices().isEmpty() && r.getChoices().get(0).getMessage() != null) {
                 return r;
             }
-        } catch (Exception ignore) {}
+        } catch (Exception ignore) {
+        }
         ChatCompletionResponse sse = mergeSseChunk(new ChatCompletionResponse(), payload);
         if (sse.getChoices() != null && !sse.getChoices().isEmpty() && sse.getChoices().get(0).getMessage() != null && sse.getChoices().get(0).getMessage().getContent() != null) {
             return sse;
@@ -513,9 +511,75 @@
         msg.setRole("assistant");
         msg.setContent(payload);
         choice.setMessage(msg);
-        java.util.ArrayList<ChatCompletionResponse.Choice> list = new java.util.ArrayList<>();
+        ArrayList<ChatCompletionResponse.Choice> list = new ArrayList<>();
         list.add(choice);
         r.setChoices(list);
         return r;
     }
+
+    private static class RawCompletionResult {
+        private final int statusCode;
+        private final String payload;
+
+        private RawCompletionResult(int statusCode, String payload) {
+            this.statusCode = statusCode;
+            this.payload = payload;
+        }
+    }
+
+    private static class LlmRouteException extends RuntimeException {
+        private final int statusCode;
+        private final String body;
+
+        private LlmRouteException(int statusCode, String body) {
+            super("http status=" + statusCode);
+            this.statusCode = statusCode;
+            this.body = body;
+        }
+    }
+
+    private static class ResolvedRoute {
+        private Long id;
+        private String name;
+        private String baseUrl;
+        private String apiKey;
+        private String model;
+        private boolean thinkingEnabled;
+        private boolean switchOnQuota;
+        private boolean switchOnError;
+        private Integer cooldownSeconds;
+
+        private static ResolvedRoute fromDb(LlmRouteConfig c) {
+            ResolvedRoute r = new ResolvedRoute();
+            r.id = c.getId();
+            r.name = c.getName();
+            r.baseUrl = c.getBaseUrl();
+            r.apiKey = c.getApiKey();
+            r.model = c.getModel();
+            r.thinkingEnabled = c.getThinking() != null && c.getThinking() == 1;
+            r.switchOnQuota = c.getSwitchOnQuota() == null || c.getSwitchOnQuota() == 1;
+            r.switchOnError = c.getSwitchOnError() == null || c.getSwitchOnError() == 1;
+            r.cooldownSeconds = c.getCooldownSeconds();
+            return r;
+        }
+
+        private static ResolvedRoute fromFallback(String baseUrl, String apiKey, String model, boolean thinkingEnabled) {
+            ResolvedRoute r = new ResolvedRoute();
+            r.name = "fallback-yml";
+            r.baseUrl = baseUrl;
+            r.apiKey = apiKey;
+            r.model = model;
+            r.thinkingEnabled = thinkingEnabled;
+            r.switchOnQuota = true;
+            r.switchOnError = true;
+            r.cooldownSeconds = 300;
+            return r;
+        }
+
+        private String tag() {
+            String showName = name == null ? "unnamed" : name;
+            String showModel = model == null ? "" : (" model=" + model);
+            return showName + showModel;
+        }
+    }
 }
diff --git a/src/main/java/com/zy/ai/service/LlmRouteConfigService.java b/src/main/java/com/zy/ai/service/LlmRouteConfigService.java
new file mode 100644
index 0000000..4bf082e
--- /dev/null
+++ b/src/main/java/com/zy/ai/service/LlmRouteConfigService.java
@@ -0,0 +1,7 @@
+package com.zy.ai.service;
+
+import com.baomidou.mybatisplus.service.IService;
+import com.zy.ai.entity.LlmRouteConfig;
+
+public interface LlmRouteConfigService extends IService<LlmRouteConfig> {
+}
diff --git a/src/main/java/com/zy/ai/service/LlmRoutingService.java b/src/main/java/com/zy/ai/service/LlmRoutingService.java
new file mode 100644
index 0000000..4323d6a
--- /dev/null
+++ b/src/main/java/com/zy/ai/service/LlmRoutingService.java
@@ -0,0 +1,270 @@
+package com.zy.ai.service;
+
+import com.baomidou.mybatisplus.mapper.EntityWrapper;
+import com.zy.ai.entity.LlmRouteConfig;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.http.HttpHeaders;
+import org.springframework.http.MediaType;
+import org.springframework.stereotype.Service;
+import org.springframework.web.reactive.function.client.WebClient;
+import reactor.core.publisher.Mono;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class LlmRoutingService {
+
+    private static final long CACHE_TTL_MS = 3000L;
+
+    private final LlmRouteConfigService llmRouteConfigService;
+
+    private volatile List<LlmRouteConfig> allRouteCache = Collections.emptyList();
+    private volatile long cacheExpireAt = 0L;
+
+    public void evictCache() {
+        cacheExpireAt = 0L;
+    }
+
+    public List<LlmRouteConfig> listAllOrdered() {
+        return new ArrayList<>(loadAllRoutes());
+    }
+
+    public List<LlmRouteConfig> listAvailableRoutes() {
+        Date now = new Date();
+        List<LlmRouteConfig> result = new ArrayList<>();
+        List<LlmRouteConfig> coolingRoutes = new ArrayList<>();
+        int total = 0;
+        int disabled = 0;
+        int invalid = 0;
+        for (LlmRouteConfig c : loadAllRoutes()) {
+            total++;
+            if (!isEnabled(c)) {
+                disabled++;
+                continue;
+            }
+            if (isBlank(c.getBaseUrl()) || isBlank(c.getApiKey()) || isBlank(c.getModel())) {
+                invalid++;
+                continue;
+            }
+            if (isCooling(c, now)) {
+                coolingRoutes.add(c);
+                continue;
+            }
+            result.add(c);
+        }
+        if (result.isEmpty() && !coolingRoutes.isEmpty()) {
+            // 閬垮厤鎵�鏈夎矾鐢遍兘澶勪簬鍐峰嵈鏃剁郴缁熷畬鍏ㄤ笉鍙敤锛岄檷绾у厑璁镐娇鐢ㄥ喎鍗磋矾鐢�
+            log.warn("LLM 璺敱鍧囧浜庡喎鍗达紝闄嶇骇鍚敤鍐峰嵈璺敱銆俢ooling={}, total={}", coolingRoutes.size(), total);
+            return coolingRoutes;
+        }
+        if (result.isEmpty()) {
+            log.warn("鏈壘鍒板彲鐢� LLM 璺敱銆倀otal={}, disabled={}, invalid={}", total, disabled, invalid);
+        }
+        return result;
+    }
+
+    public void markSuccess(Long routeId) {
+        if (routeId == null) return;
+        try {
+            LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
+            if (db == null) return;
+            db.setSuccessCount(nvl(db.getSuccessCount()) + 1);
+            db.setConsecutiveFailCount(0);
+            db.setLastUsedTime(new Date());
+            db.setUpdateTime(new Date());
+            llmRouteConfigService.updateById(db);
+            evictCache();
+        } catch (Exception e) {
+            log.warn("鏇存柊璺敱鎴愬姛鐘舵�佸け璐�, routeId={}", routeId, e);
+        }
+    }
+
+    public void markFailure(Long routeId, String errorText, boolean enterCooldown, Integer cooldownSeconds) {
+        if (routeId == null) return;
+        try {
+            LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
+            if (db == null) return;
+            Date now = new Date();
+            db.setFailCount(nvl(db.getFailCount()) + 1);
+            db.setConsecutiveFailCount(nvl(db.getConsecutiveFailCount()) + 1);
+            db.setLastFailTime(now);
+            db.setLastError(trimError(errorText));
+            if (enterCooldown) {
+                int sec = cooldownSeconds != null && cooldownSeconds > 0
+                        ? cooldownSeconds
+                        : defaultCooldown(db.getCooldownSeconds());
+                db.setCooldownUntil(new Date(now.getTime() + sec * 1000L));
+            }
+            db.setUpdateTime(now);
+            llmRouteConfigService.updateById(db);
+            evictCache();
+        } catch (Exception e) {
+            log.warn("鏇存柊璺敱澶辫触鐘舵�佸け璐�, routeId={}", routeId, e);
+        }
+    }
+
+    private int defaultCooldown(Integer sec) {
+        return sec == null || sec <= 0 ? 300 : sec;
+    }
+
+    private String trimError(String err) {
+        if (err == null) return null;
+        String x = err.replace("\n", " ").replace("\r", " ");
+        return x.length() > 500 ? x.substring(0, 500) : x;
+    }
+
+    private Integer nvl(Integer x) {
+        return x == null ? 0 : x;
+    }
+
+    private boolean isEnabled(LlmRouteConfig c) {
+        return c != null && c.getStatus() != null && c.getStatus() == 1;
+    }
+
+    private boolean isCooling(LlmRouteConfig c, Date now) {
+        return c != null && c.getCooldownUntil() != null && c.getCooldownUntil().after(now);
+    }
+
+    private List<LlmRouteConfig> loadAllRoutes() {
+        long now = System.currentTimeMillis();
+        if (now < cacheExpireAt && allRouteCache != null) {
+            return allRouteCache;
+        }
+        synchronized (this) {
+            now = System.currentTimeMillis();
+            if (now < cacheExpireAt && allRouteCache != null) {
+                return allRouteCache;
+            }
+            EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>();
+            wrapper.orderBy("priority", true).orderBy("id", true);
+            List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper);
+            allRouteCache = list == null ? Collections.emptyList() : list;
+            cacheExpireAt = System.currentTimeMillis() + CACHE_TTL_MS;
+            return allRouteCache;
+        }
+    }
+
+    private String safe(String s) {
+        return s == null ? "" : s.trim();
+    }
+
+    private boolean isBlank(String s) {
+        return s == null || s.trim().isEmpty();
+    }
+
+    public LlmRouteConfig fillAndNormalize(LlmRouteConfig cfg, boolean isCreate) {
+        Date now = new Date();
+        if (isBlank(cfg.getName())) {
+            cfg.setName("LLM_ROUTE_" + now.getTime());
+        }
+        if (cfg.getThinking() == null) {
+            cfg.setThinking((short) 0);
+        }
+        if (cfg.getPriority() == null) {
+            cfg.setPriority(100);
+        }
+        if (cfg.getStatus() == null) {
+            cfg.setStatus((short) 1);
+        }
+        if (cfg.getSwitchOnQuota() == null) {
+            cfg.setSwitchOnQuota((short) 1);
+        }
+        if (cfg.getSwitchOnError() == null) {
+            cfg.setSwitchOnError((short) 1);
+        }
+        if (cfg.getCooldownSeconds() == null || cfg.getCooldownSeconds() < 0) {
+            cfg.setCooldownSeconds(300);
+        }
+        if (cfg.getFailCount() == null) {
+            cfg.setFailCount(0);
+        }
+        if (cfg.getSuccessCount() == null) {
+            cfg.setSuccessCount(0);
+        }
+        if (cfg.getConsecutiveFailCount() == null) {
+            cfg.setConsecutiveFailCount(0);
+        }
+        if (isCreate) {
+            cfg.setCreateTime(now);
+        }
+        cfg.setUpdateTime(now);
+        return cfg;
+    }
+
+    public Map<String, Object> testRoute(LlmRouteConfig cfg) {
+        HashMap<String, Object> result = new HashMap<>();
+        long start = System.currentTimeMillis();
+        try {
+            TestHttpResult raw = testJavaRoute(cfg);
+            fillTestResult(result, raw, start);
+        } catch (Exception e) {
+            result.put("ok", false);
+            result.put("statusCode", -1);
+            result.put("latencyMs", System.currentTimeMillis() - start);
+            result.put("message", "娴嬭瘯寮傚父: " + safe(e.getMessage()));
+            result.put("responseSnippet", "");
+        }
+        return result;
+    }
+
+    private void fillTestResult(HashMap<String, Object> result, TestHttpResult raw, long start) {
+        boolean ok = raw.statusCode >= 200 && raw.statusCode < 300;
+        result.put("ok", ok);
+        result.put("statusCode", raw.statusCode);
+        result.put("latencyMs", System.currentTimeMillis() - start);
+        result.put("message", ok ? "娴嬭瘯鎴愬姛" : "娴嬭瘯澶辫触");
+        result.put("responseSnippet", trimBody(raw.body));
+    }
+
+    private TestHttpResult testJavaRoute(LlmRouteConfig cfg) {
+        HashMap<String, Object> req = new HashMap<>();
+        req.put("model", cfg.getModel());
+        List<Map<String, String>> messages = new ArrayList<>();
+        HashMap<String, String> msg = new HashMap<>();
+        msg.put("role", "user");
+        msg.put("content", "ping");
+        messages.add(msg);
+        req.put("messages", messages);
+        req.put("stream", false);
+        req.put("max_tokens", 8);
+        req.put("temperature", 0);
+
+        WebClient client = WebClient.builder().baseUrl(cfg.getBaseUrl()).build();
+        return client.post()
+                .uri("/chat/completions")
+                .header(HttpHeaders.AUTHORIZATION, "Bearer " + cfg.getApiKey())
+                .contentType(MediaType.APPLICATION_JSON)
+                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
+                .bodyValue(req)
+                .exchangeToMono(resp -> resp.bodyToMono(String.class)
+                        .defaultIfEmpty("")
+                        .map(body -> new TestHttpResult(resp.rawStatusCode(), body)))
+                .timeout(Duration.ofSeconds(12))
+                .onErrorResume(ex -> Mono.just(new TestHttpResult(-1, safe(ex.getMessage()))))
+                .block();
+    }
+
+    private String trimBody(String body) {
+        String x = safe(body).replace("\r", " ").replace("\n", " ");
+        return x.length() > 300 ? x.substring(0, 300) : x;
+    }
+
+    private static class TestHttpResult {
+        private final int statusCode;
+        private final String body;
+
+        private TestHttpResult(int statusCode, String body) {
+            this.statusCode = statusCode;
+            this.body = body;
+        }
+    }
+}
diff --git a/src/main/java/com/zy/ai/service/PythonService.java b/src/main/java/com/zy/ai/service/PythonService.java
deleted file mode 100644
index adc712b..0000000
--- a/src/main/java/com/zy/ai/service/PythonService.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package com.zy.ai.service;
-
-import lombok.extern.slf4j.Slf4j;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.stereotype.Component;
-import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
-
-@Slf4j
-@Component
-public class PythonService {
-
-    @Autowired
-    private LlmChatService llmChatService;
-
-    public boolean runPython(String prompt, String chatId, SseEmitter emitter) {
-        try {
-            llmChatService.chatStreamRunPython(prompt, chatId, s -> {
-                try {
-                    String safe = s == null ? "" : s.replace("\r", "").replace("\n", "\\n");
-                    if (!safe.isEmpty()) {
-                        sse(emitter, safe);
-                    }
-                } catch (Exception ignore) {
-                }
-            }, () -> {
-                try {
-                    sse(emitter, "\\n\\n銆怉I銆戣繍琛屽凡鍋滄锛堟甯哥粨鏉燂級\\n\\n");
-                    log.info("AI MCP diagnose stopped: final end");
-                    emitter.complete();
-                } catch (Exception ignore) {
-                }
-            }, e -> {
-                sse(emitter, "\\n\\n銆怉I銆戝垎鏋愬嚭閿欙紝姝e湪鍥為��...\\n\\n");
-            });
-            return true;
-        } catch (Exception e) {
-            try {
-                sse(emitter, "\\n\\n銆怉I銆戣繍琛屽凡鍋滄锛堝紓甯革級\\n\\n");
-                log.error("AI MCP diagnose stopped: error", e);
-                emitter.completeWithError(e);
-            } catch (Exception ignore) {}
-            return true;
-        }
-    }
-
-    private void sse(SseEmitter emitter, String data) {
-        if (data == null) return;
-        try {
-            emitter.send(SseEmitter.event().data(data));
-        } catch (Exception e) {
-            log.warn("SSE send failed", e);
-        }
-    }
-
-}
diff --git a/src/main/java/com/zy/ai/service/WcsDiagnosisService.java b/src/main/java/com/zy/ai/service/WcsDiagnosisService.java
index 189e57d..45dea58 100644
--- a/src/main/java/com/zy/ai/service/WcsDiagnosisService.java
+++ b/src/main/java/com/zy/ai/service/WcsDiagnosisService.java
@@ -11,7 +11,6 @@
 import com.zy.common.utils.RedisUtil;
 import com.zy.core.enums.RedisKeyType;
 import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Value;
 import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
@@ -28,8 +27,6 @@
 
     private static final long CHAT_TTL_SECONDS = 7L * 24 * 3600;
 
-    @Value("${llm.platform}")
-    private String platform;
     @Autowired
     private LlmChatService llmChatService;
     @Autowired
@@ -40,8 +37,6 @@
     private AiUtils aiUtils;
     @Autowired(required = false)
     private McpController mcpController;
-    @Autowired
-    private PythonService pythonService;
 
     public void diagnoseStream(WcsDiagnosisRequest request, SseEmitter emitter) {
         List<ChatCompletionRequest.Message> messages = new ArrayList<>();
@@ -85,7 +80,7 @@
             try {
                 try { emitter.send(SseEmitter.event().data("銆怉I銆戣繍琛屽凡鍋滄锛堝紓甯革級")); } catch (Exception ignore) {}
                 log.error("AI diagnose stream stopped: error", e);
-                emitter.completeWithError(e);
+                emitter.complete();
             } catch (Exception ignore) {}
         });
     }
@@ -95,11 +90,6 @@
                           String chatId,
                           boolean reset,
                           SseEmitter emitter) {
-        if (platform.equals("python")) {
-            pythonService.runPython(prompt, chatId, emitter);
-            return;
-        }
-
         List<ChatCompletionRequest.Message> messages = new ArrayList<>();
 
         List<ChatCompletionRequest.Message> history = null;
@@ -187,7 +177,10 @@
                 emitter.complete();
             } catch (Exception ignore) {}
         }, e -> {
-            try { emitter.completeWithError(e); } catch (Exception ignore) {}
+            try {
+                try { emitter.send(SseEmitter.event().data("銆怉I銆戣繍琛屽凡鍋滄锛堝紓甯革級")); } catch (Exception ignore) {}
+                emitter.complete();
+            } catch (Exception ignore) {}
         });
     }
 
@@ -380,7 +373,7 @@
             try {
                 sse(emitter, "\\n\\n銆怉I銆戣繍琛屽凡鍋滄锛堝紓甯革級\\n\\n");
                 log.error("AI MCP diagnose stopped: error", e);
-                emitter.completeWithError(e);
+                emitter.complete();
             } catch (Exception ignore) {}
             return true;
         }
diff --git a/src/main/java/com/zy/ai/service/impl/LlmRouteConfigServiceImpl.java b/src/main/java/com/zy/ai/service/impl/LlmRouteConfigServiceImpl.java
new file mode 100644
index 0000000..0315c11
--- /dev/null
+++ b/src/main/java/com/zy/ai/service/impl/LlmRouteConfigServiceImpl.java
@@ -0,0 +1,11 @@
+package com.zy.ai.service.impl;
+
+import com.baomidou.mybatisplus.service.impl.ServiceImpl;
+import com.zy.ai.entity.LlmRouteConfig;
+import com.zy.ai.mapper.LlmRouteConfigMapper;
+import com.zy.ai.service.LlmRouteConfigService;
+import org.springframework.stereotype.Service;
+
+@Service("llmRouteConfigService")
+public class LlmRouteConfigServiceImpl extends ServiceImpl<LlmRouteConfigMapper, LlmRouteConfig> implements LlmRouteConfigService {
+}
diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml
index fe77635..116f9b4 100644
--- a/src/main/resources/application.yml
+++ b/src/main/resources/application.yml
@@ -1,6 +1,6 @@
 # 绯荤粺鐗堟湰淇℃伅
 app:
-  version: 1.0.4.2
+  version: 1.0.4.3
   version-type: dev  # prd 鎴� dev
 
 server:
@@ -83,27 +83,15 @@
   expireDays: 7
 
 llm:
-  platform: java
-  pythonPlatformUrl: http://127.0.0.1:9000/ai/diagnose/askStream
-  thinking: enable
-#  base-url: https://api.siliconflow.cn/v1
-#  api-key: sk-sxdtebtquwrugzrmaqqqkzdzmrgzhzmplwwuowysdasccent
-#  model: deepseek-ai/DeepSeek-V3.2
-#  base-url: http://47.76.147.249:9998/e/7g7kqxxt1ei2un71
-#  api-key: app-mP0O6aY5WpbfaHs7BNnjVkli
-#  model: deepseek-ai/DeepSeek-V3.2
-#  base-url: http://34.2.134.223:3000/v1
-#  api-key: sk-WabrmtOezCFwVo7XvVOrO3QkmfcKG7T7jy0BaVnmQTWm5GXh
-#  model: gemini-3-pro-preview
-#  base-url: http://127.0.0.1:8317/v1
-#  api-key: WznOjAGJNVFKSe9kBZTr
-#  model: gpt-5
-  base-url: https://api.xiaomimimo.com/v1
-  api-key: sk-cw7e4se9cal8cxdgjml8dmtn4pdmqtvfccg5fcermt0ddtys
-  model: mimo-v2-flash
+  # 鐜板凡杩佺Щ鍒版暟鎹簱琛� sys_llm_route 缁存姢锛堟敮鎸佸API/澶氭ā鍨�/澶欿ey鑷姩鍒囨崲锛�
+  # 浠ヤ笅浠呬綔涓烘暟鎹簱涓虹┖鏃剁殑鍏煎鍥為��閰嶇疆
+  thinking: false
+  base-url:
+  api-key:
+  model:
 
 perf:
   methodTiming:
     enabled: false
     thresholdMs: 50
-    sampleRate: 1.0
\ No newline at end of file
+    sampleRate: 1.0
diff --git a/src/main/resources/mapper/LlmRouteConfigMapper.xml b/src/main/resources/mapper/LlmRouteConfigMapper.xml
new file mode 100644
index 0000000..1930a0c
--- /dev/null
+++ b/src/main/resources/mapper/LlmRouteConfigMapper.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.zy.ai.mapper.LlmRouteConfigMapper">
+
+    <resultMap id="BaseResultMap" type="com.zy.ai.entity.LlmRouteConfig">
+        <id column="id" property="id"/>
+        <result column="name" property="name"/>
+        <result column="base_url" property="baseUrl"/>
+        <result column="api_key" property="apiKey"/>
+        <result column="model" property="model"/>
+        <result column="thinking" property="thinking"/>
+        <result column="priority" property="priority"/>
+        <result column="status" property="status"/>
+        <result column="switch_on_quota" property="switchOnQuota"/>
+        <result column="switch_on_error" property="switchOnError"/>
+        <result column="cooldown_seconds" property="cooldownSeconds"/>
+        <result column="cooldown_until" property="cooldownUntil"/>
+        <result column="fail_count" property="failCount"/>
+        <result column="success_count" property="successCount"/>
+        <result column="consecutive_fail_count" property="consecutiveFailCount"/>
+        <result column="last_error" property="lastError"/>
+        <result column="last_used_time" property="lastUsedTime"/>
+        <result column="last_fail_time" property="lastFailTime"/>
+        <result column="create_time" property="createTime"/>
+        <result column="update_time" property="updateTime"/>
+        <result column="memo" property="memo"/>
+    </resultMap>
+
+</mapper>
diff --git a/src/main/resources/sql/20260303_add_ai_config_menu.sql b/src/main/resources/sql/20260303_add_ai_config_menu.sql
new file mode 100644
index 0000000..58c9a97
--- /dev/null
+++ b/src/main/resources/sql/20260303_add_ai_config_menu.sql
@@ -0,0 +1,46 @@
+-- 灏� AI閰嶇疆 鑿滃崟鎸傝浇鍒帮細寮�鍙戜笓鐢� -> AI閰嶇疆
+-- 璇存槑锛氭湰绯荤粺鑿滃崟鏉ユ簮浜� sys_resource锛屾墽琛屾湰鑴氭湰鍚庤鍦ㄢ�滆鑹叉巿鏉冣�濋噷缁欏搴旇鑹插嬀閫夋柊鑿滃崟銆�
+
+-- 1) 瀹氫綅鈥滃紑鍙戜笓鐢ㄢ�濅竴绾ц彍鍗�
+SET @dev_parent_id := (
+  SELECT id
+  FROM sys_resource
+  WHERE name = '寮�鍙戜笓鐢�' AND level = 1
+  ORDER BY id
+  LIMIT 1
+);
+
+-- 2) 鏂板浜岀骇鑿滃崟锛欰I閰嶇疆锛堥〉闈級
+INSERT INTO sys_resource(code, name, resource_id, level, sort, status)
+SELECT 'ai/llm_config.html', 'AI閰嶇疆', @dev_parent_id, 2, 999, 1
+FROM dual
+WHERE @dev_parent_id IS NOT NULL
+  AND NOT EXISTS (
+    SELECT 1
+    FROM sys_resource
+    WHERE code = 'ai/llm_config.html' AND level = 2
+  );
+
+-- 3) 鏂板涓夌骇鎸夐挳鏉冮檺锛氭煡鐪嬶紙鐢ㄤ簬瑙掕壊缁嗙矑搴︽巿鏉冿級
+SET @ai_cfg_id := (
+  SELECT id
+  FROM sys_resource
+  WHERE code = 'ai/llm_config.html' AND level = 2
+  ORDER BY id
+  LIMIT 1
+);
+
+INSERT INTO sys_resource(code, name, resource_id, level, sort, status)
+SELECT 'ai/llm_config.html#view', '鏌ョ湅', @ai_cfg_id, 3, 1, 1
+FROM dual
+WHERE @ai_cfg_id IS NOT NULL
+  AND NOT EXISTS (
+    SELECT 1
+    FROM sys_resource
+    WHERE code = 'ai/llm_config.html#view' AND level = 3
+  );
+
+-- 鍙�夋鏌�
+SELECT id, code, name, resource_id, level, sort, status
+FROM sys_resource
+WHERE code IN ('ai/llm_config.html', 'ai/llm_config.html#view');
diff --git a/src/main/resources/sql/20260303_create_sys_llm_route.sql b/src/main/resources/sql/20260303_create_sys_llm_route.sql
new file mode 100644
index 0000000..47d67ff
--- /dev/null
+++ b/src/main/resources/sql/20260303_create_sys_llm_route.sql
@@ -0,0 +1,32 @@
+CREATE TABLE IF NOT EXISTS `sys_llm_route` (
+  `id` BIGINT NOT NULL AUTO_INCREMENT COMMENT '涓婚敭',
+  `name` VARCHAR(64) NOT NULL COMMENT '璺敱鍚嶇О',
+  `base_url` VARCHAR(255) DEFAULT NULL COMMENT 'LLM API Base URL',
+  `api_key` VARCHAR(512) DEFAULT NULL COMMENT 'API Key',
+  `model` VARCHAR(128) DEFAULT NULL COMMENT '妯″瀷鍚�',
+  `thinking` TINYINT NOT NULL DEFAULT 0 COMMENT '鏄惁寮�鍚繁搴︽�濊��:1鏄�0鍚�',
+  `priority` INT NOT NULL DEFAULT 100 COMMENT '浼樺厛绾�(瓒婂皬瓒婁紭鍏�)',
+  `status` TINYINT NOT NULL DEFAULT 1 COMMENT '鐘舵��:1鍚敤0绂佺敤',
+  `switch_on_quota` TINYINT NOT NULL DEFAULT 1 COMMENT '棰濆害鑰楀敖鏃舵槸鍚﹀垏鎹�',
+  `switch_on_error` TINYINT NOT NULL DEFAULT 1 COMMENT '鏁呴殰鏃舵槸鍚﹀垏鎹�',
+  `cooldown_seconds` INT NOT NULL DEFAULT 300 COMMENT '鏁呴殰鍚庡喎鍗寸鏁�',
+  `cooldown_until` DATETIME DEFAULT NULL COMMENT '鍐峰嵈鎴鏃堕棿',
+  `fail_count` INT NOT NULL DEFAULT 0 COMMENT '鎬诲け璐ユ鏁�',
+  `success_count` INT NOT NULL DEFAULT 0 COMMENT '鎬绘垚鍔熸鏁�',
+  `consecutive_fail_count` INT NOT NULL DEFAULT 0 COMMENT '杩炵画澶辫触娆℃暟',
+  `last_error` VARCHAR(512) DEFAULT NULL COMMENT '鏈�杩戦敊璇憳瑕�',
+  `last_used_time` DATETIME DEFAULT NULL COMMENT '鏈�杩戞垚鍔熻皟鐢ㄦ椂闂�',
+  `last_fail_time` DATETIME DEFAULT NULL COMMENT '鏈�杩戝け璐ユ椂闂�',
+  `create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '鍒涘缓鏃堕棿',
+  `update_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '鏇存柊鏃堕棿',
+  `memo` VARCHAR(255) DEFAULT NULL COMMENT '澶囨敞',
+  PRIMARY KEY (`id`),
+  KEY `idx_sys_llm_route_status_priority` (`status`, `priority`),
+  KEY `idx_sys_llm_route_cooldown` (`cooldown_until`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='LLM 璺敱閰嶇疆(澶欰PI/澶氭ā鍨�/澶欿ey鑷姩鍒囨崲)';
+
+-- 绀轰緥鏁版嵁锛堟寜闇�淇敼鍚庢墽琛岋級
+-- INSERT INTO sys_llm_route(name, base_url, api_key, model, thinking, priority, status, switch_on_quota, switch_on_error, cooldown_seconds)
+-- VALUES ('涓昏矾鐢�-gpt5', 'https://api.xiaomimimo.com/v1', 'sk-xxxx', 'gpt-5', 1, 10, 1, 1, 1, 300);
+-- INSERT INTO sys_llm_route(name, base_url, api_key, model, thinking, priority, status, switch_on_quota, switch_on_error, cooldown_seconds)
+-- VALUES ('澶囪矾鐢�-mimo', 'https://api.xiaomimimo.com/v1', 'sk-yyyy', 'mimo-v2-flash', 0, 20, 1, 1, 1, 300);
diff --git a/src/main/webapp/views/ai/llm_config.html b/src/main/webapp/views/ai/llm_config.html
new file mode 100644
index 0000000..dbbbc6a
--- /dev/null
+++ b/src/main/webapp/views/ai/llm_config.html
@@ -0,0 +1,421 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>AI閰嶇疆</title>
+  <link rel="stylesheet" href="../../static/vue/element/element.css" />
+  <style>
+    body {
+      margin: 0;
+      background:
+        radial-gradient(1200px 500px at 10% -10%, rgba(26, 115, 232, 0.14), transparent 50%),
+        radial-gradient(900px 450px at 100% 0%, rgba(38, 166, 154, 0.11), transparent 55%),
+        #f4f7fb;
+    }
+    .container {
+      max-width: 1640px;
+      margin: 16px auto;
+      padding: 0 14px;
+    }
+    .hero {
+      background: linear-gradient(135deg, #0f4c81 0%, #1f6fb2 45%, #2aa198 100%);
+      color: #fff;
+      border-radius: 14px;
+      padding: 14px 16px;
+      margin-bottom: 10px;
+      box-shadow: 0 10px 28px rgba(23, 70, 110, 0.22);
+    }
+    .hero-top {
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      gap: 10px;
+    }
+    .hero-title {
+      display: flex;
+      align-items: center;
+      gap: 10px;
+    }
+    .hero-title .main {
+      font-size: 16px;
+      font-weight: 700;
+      letter-spacing: 0.2px;
+    }
+    .hero-title .sub {
+      font-size: 12px;
+      opacity: 0.9;
+    }
+    .summary-grid {
+      margin-top: 10px;
+      display: grid;
+      grid-template-columns: repeat(5, minmax(0, 1fr));
+      gap: 8px;
+    }
+    .summary-card {
+      border-radius: 10px;
+      background: rgba(255, 255, 255, 0.16);
+      border: 1px solid rgba(255, 255, 255, 0.24);
+      padding: 8px 10px;
+      min-height: 56px;
+      backdrop-filter: blur(3px);
+    }
+    .summary-card .k {
+      font-size: 11px;
+      opacity: 0.88;
+    }
+    .summary-card .v {
+      margin-top: 4px;
+      font-size: 22px;
+      font-weight: 700;
+      line-height: 1.1;
+    }
+    .table-shell {
+      border-radius: 12px;
+      overflow: hidden;
+      box-shadow: 0 6px 22px rgba(15, 28, 48, 0.08);
+      border: 1px solid #e8edf5;
+      background: #fff;
+    }
+    .mono {
+      font-family: Menlo, Monaco, Consolas, "Liberation Mono", monospace;
+      font-size: 12px;
+    }
+    @media (max-width: 1280px) {
+      .summary-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
+    }
+  </style>
+</head>
+<body>
+<div id="app" class="container">
+  <div class="hero">
+    <div class="hero-top">
+      <div class="hero-title">
+        <div v-html="headerIcon" style="display:flex;"></div>
+        <div>
+          <div class="main">AI閰嶇疆 - LLM璺敱</div>
+          <div class="sub">鏀寔澶欰PI銆佸妯″瀷銆佸Key锛岄搴﹁�楀敖鎴栨晠闅滆嚜鍔ㄥ垏鎹�</div>
+        </div>
+      </div>
+      <div>
+        <el-button type="primary" size="mini" @click="addRoute">鏂板璺敱</el-button>
+        <el-button size="mini" @click="loadRoutes">鍒锋柊</el-button>
+      </div>
+    </div>
+    <div class="summary-grid">
+      <div class="summary-card">
+        <div class="k">鎬昏矾鐢�</div>
+        <div class="v">{{ summary.total }}</div>
+      </div>
+      <div class="summary-card">
+        <div class="k">鍚敤</div>
+        <div class="v">{{ summary.enabled }}</div>
+      </div>
+      <div class="summary-card">
+        <div class="k">鏁呴殰鍒囨崲寮�鍚�</div>
+        <div class="v">{{ summary.errorSwitch }}</div>
+      </div>
+      <div class="summary-card">
+        <div class="k">棰濆害鍒囨崲寮�鍚�</div>
+        <div class="v">{{ summary.quotaSwitch }}</div>
+      </div>
+      <div class="summary-card">
+        <div class="k">鍐峰嵈涓�</div>
+        <div class="v">{{ summary.cooling }}</div>
+      </div>
+    </div>
+  </div>
+
+  <div class="table-shell">
+    <el-table :data="routes" stripe height="72vh" v-loading="loading" :header-cell-style="{background:'#f7f9fc', color:'#2e3a4d', fontWeight:600}">
+      <el-table-column label="鍚嶇О" width="170">
+        <template slot-scope="scope">
+          <el-input v-model="scope.row.name" size="mini"></el-input>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="Base URL" min-width="220">
+        <template slot-scope="scope">
+          <el-input v-model="scope.row.baseUrl" class="mono" size="mini" placeholder="蹇呭~锛屼緥濡�: https://api.deepseek.com"></el-input>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="妯″瀷" width="180">
+        <template slot-scope="scope">
+          <el-input v-model="scope.row.model" class="mono" size="mini" placeholder="蹇呭~锛屼緥濡�: deepseek-chat"></el-input>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="API Key" min-width="220">
+        <template slot-scope="scope">
+          <el-input v-model="scope.row.apiKey" class="mono" type="password" size="mini" placeholder="蹇呭~"></el-input>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="浼樺厛绾�" width="90">
+        <template slot-scope="scope">
+          <el-input-number v-model="scope.row.priority" size="mini" :min="0" :max="99999" :controls="false" style="width:80px;"></el-input-number>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="鐘舵��" width="70">
+        <template slot-scope="scope">
+          <el-switch v-model="scope.row.status" :active-value="1" :inactive-value="0"></el-switch>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="鎬濊��" width="70">
+        <template slot-scope="scope">
+          <el-switch v-model="scope.row.thinking" :active-value="1" :inactive-value="0"></el-switch>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="棰濆害鍒囨崲" width="90">
+        <template slot-scope="scope">
+          <el-switch v-model="scope.row.switchOnQuota" :active-value="1" :inactive-value="0"></el-switch>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="鏁呴殰鍒囨崲" width="90">
+        <template slot-scope="scope">
+          <el-switch v-model="scope.row.switchOnError" :active-value="1" :inactive-value="0"></el-switch>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="鍐峰嵈绉掓暟" width="100">
+        <template slot-scope="scope">
+          <el-input-number v-model="scope.row.cooldownSeconds" size="mini" :min="0" :max="86400" :controls="false" style="width:90px;"></el-input-number>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="缁熻" min-width="220">
+        <template slot-scope="scope">
+          <div>鎴愬姛: {{ scope.row.successCount || 0 }} / 澶辫触: {{ scope.row.failCount || 0 }} / 杩炵画澶辫触: {{ scope.row.consecutiveFailCount || 0 }}</div>
+          <div style="color:#909399;">鍐峰嵈鍒�: {{ scope.row.cooldownUntil || '-' }}</div>
+          <div style="color:#909399;">鏈�杩戦敊璇�: {{ scope.row.lastError || '-' }}</div>
+        </template>
+      </el-table-column>
+
+      <el-table-column label="鎿嶄綔" width="120" fixed="right" align="center">
+        <template slot-scope="scope">
+          <el-dropdown trigger="click" @command="function(cmd){ handleRouteCommand(cmd, scope.row, scope.$index); }">
+            <el-button size="mini" type="primary" plain>
+              鎿嶄綔<i class="el-icon-arrow-down el-icon--right"></i>
+            </el-button>
+            <el-dropdown-menu slot="dropdown">
+              <el-dropdown-item command="test" :disabled="scope.row.__testing === true">
+                {{ scope.row.__testing === true ? '娴嬭瘯涓�...' : '娴嬭瘯' }}
+              </el-dropdown-item>
+              <el-dropdown-item command="save">淇濆瓨</el-dropdown-item>
+              <el-dropdown-item command="cooldown">娓呭喎鍗�</el-dropdown-item>
+              <el-dropdown-item command="delete" divided>鍒犻櫎</el-dropdown-item>
+            </el-dropdown-menu>
+          </el-dropdown>
+        </template>
+      </el-table-column>
+    </el-table>
+  </div>
+</div>
+
+<script type="text/javascript" src="../../static/vue/js/vue.min.js"></script>
+<script type="text/javascript" src="../../static/vue/element/element.js"></script>
+<script type="text/javascript" src="../../static/js/common.js" charset="utf-8"></script>
+<script>
+  new Vue({
+    el: '#app',
+    data: function() {
+      return {
+        headerIcon: getAiIconHtml(34, 34),
+        loading: false,
+        routes: []
+      };
+    },
+    computed: {
+      summary: function() {
+        var now = Date.now();
+        var total = this.routes.length;
+        var enabled = 0, quotaSwitch = 0, errorSwitch = 0, cooling = 0;
+        for (var i = 0; i < this.routes.length; i++) {
+          var x = this.routes[i];
+          if (x.status === 1) enabled++;
+          if (x.switchOnQuota === 1) quotaSwitch++;
+          if (x.switchOnError === 1) errorSwitch++;
+          if (x.cooldownUntil && new Date(x.cooldownUntil).getTime() > now) cooling++;
+        }
+        return { total: total, enabled: enabled, quotaSwitch: quotaSwitch, errorSwitch: errorSwitch, cooling: cooling };
+      }
+    },
+    methods: {
+      authHeaders: function() {
+        return { 'token': localStorage.getItem('token') };
+      },
+      handleRouteCommand: function(command, route, idx) {
+        if (command === 'test') return this.testRoute(route);
+        if (command === 'save') return this.saveRoute(route);
+        if (command === 'cooldown') return this.clearCooldown(route);
+        if (command === 'delete') return this.deleteRoute(route, idx);
+      },
+      loadRoutes: function() {
+        var self = this;
+        self.loading = true;
+        fetch(baseUrl + '/ai/llm/config/list/auth', { headers: self.authHeaders() })
+          .then(function(r){ return r.json(); })
+          .then(function(res){
+            self.loading = false;
+            if (res && res.code === 200) {
+              self.routes = Array.isArray(res.data) ? res.data : [];
+            } else {
+              self.$message.error((res && res.msg) ? res.msg : '鍔犺浇澶辫触');
+            }
+          })
+          .catch(function(){
+            self.loading = false;
+            self.$message.error('鍔犺浇澶辫触');
+          });
+      },
+      addRoute: function() {
+        this.routes.unshift({
+          id: null,
+          name: '',
+          baseUrl: '',
+          apiKey: '',
+          model: '',
+          thinking: 0,
+          priority: 100,
+          status: 1,
+          switchOnQuota: 1,
+          switchOnError: 1,
+          cooldownSeconds: 300,
+          successCount: 0,
+          failCount: 0,
+          consecutiveFailCount: 0,
+          cooldownUntil: null,
+          lastError: null
+        });
+      },
+      buildPayload: function(route) {
+        return {
+          id: route.id,
+          name: route.name,
+          baseUrl: route.baseUrl,
+          apiKey: route.apiKey,
+          model: route.model,
+          thinking: route.thinking,
+          priority: route.priority,
+          status: route.status,
+          switchOnQuota: route.switchOnQuota,
+          switchOnError: route.switchOnError,
+          cooldownSeconds: route.cooldownSeconds,
+          memo: route.memo
+        };
+      },
+      saveRoute: function(route) {
+        var self = this;
+        fetch(baseUrl + '/ai/llm/config/save/auth', {
+          method: 'POST',
+          headers: Object.assign({ 'Content-Type': 'application/json' }, self.authHeaders()),
+          body: JSON.stringify(self.buildPayload(route))
+        })
+          .then(function(r){ return r.json(); })
+          .then(function(res){
+            if (res && res.code === 200) {
+              self.$message.success('淇濆瓨鎴愬姛');
+              self.loadRoutes();
+            } else {
+              self.$message.error((res && res.msg) ? res.msg : '淇濆瓨澶辫触');
+            }
+          })
+          .catch(function(){
+            self.$message.error('淇濆瓨澶辫触');
+          });
+      },
+      deleteRoute: function(route, idx) {
+        var self = this;
+        if (!route.id) {
+          self.routes.splice(idx, 1);
+          return;
+        }
+        self.$confirm('纭畾鍒犻櫎璇ヨ矾鐢卞悧锛�', '鎻愮ず', { type: 'warning' }).then(function() {
+        fetch(baseUrl + '/ai/llm/config/delete/auth?id=' + encodeURIComponent(route.id), {
+          method: 'POST',
+          headers: self.authHeaders()
+        })
+          .then(function(r){ return r.json(); })
+          .then(function(res){
+            if (res && res.code === 200) {
+              self.$message.success('鍒犻櫎鎴愬姛');
+              self.loadRoutes();
+            } else {
+              self.$message.error((res && res.msg) ? res.msg : '鍒犻櫎澶辫触');
+            }
+          })
+          .catch(function(){
+            self.$message.error('鍒犻櫎澶辫触');
+          });
+        }).catch(function(){});
+      },
+      clearCooldown: function(route) {
+        var self = this;
+        if (!route.id) return;
+        fetch(baseUrl + '/ai/llm/config/clearCooldown/auth?id=' + encodeURIComponent(route.id), {
+          method: 'POST',
+          headers: self.authHeaders()
+        })
+          .then(function(r){ return r.json(); })
+          .then(function(res){
+            if (res && res.code === 200) {
+              self.$message.success('宸叉竻闄ゅ喎鍗�');
+              self.loadRoutes();
+            } else {
+              self.$message.error((res && res.msg) ? res.msg : '鎿嶄綔澶辫触');
+            }
+          })
+          .catch(function(){
+            self.$message.error('鎿嶄綔澶辫触');
+          });
+      },
+      testRoute: function(route) {
+        var self = this;
+        if (route.__testing === true) return;
+        if (!route.id) {
+          self.$message.warning('褰撳墠鏄湭淇濆瓨閰嶇疆锛屾祴璇曢�氳繃鍚庝粛闇�鍏堜繚瀛樻墠浼氱敓鏁�');
+        }
+        self.$set(route, '__testing', true);
+        fetch(baseUrl + '/ai/llm/config/test/auth', {
+          method: 'POST',
+          headers: Object.assign({ 'Content-Type': 'application/json' }, self.authHeaders()),
+          body: JSON.stringify(self.buildPayload(route))
+        })
+          .then(function(r){ return r.json(); })
+          .then(function(res){
+            if (!res || res.code !== 200) {
+              self.$message.error((res && res.msg) ? res.msg : '娴嬭瘯澶辫触');
+              return;
+            }
+            var data = res.data || {};
+            var ok = data.ok === true;
+            var title = ok ? '娴嬭瘯鎴愬姛' : '娴嬭瘯澶辫触';
+            var msg = ''
+              + '璺敱: ' + (route.name || '-') + '\n'
+              + 'Base URL: ' + (route.baseUrl || '-') + '\n'
+              + '鐘舵�佺爜: ' + (data.statusCode != null ? data.statusCode : '-') + '\n'
+              + '鑰楁椂: ' + (data.latencyMs != null ? data.latencyMs : '-') + ' ms\n'
+              + '缁撴灉: ' + (data.message || '-') + '\n'
+              + '杩斿洖鐗囨: ' + (data.responseSnippet || '-');
+            self.$alert(msg, title, { confirmButtonText: '纭畾', type: ok ? 'success' : 'error' });
+          })
+          .catch(function(){
+            self.$message.error('娴嬭瘯澶辫触');
+          })
+          .finally(function(){
+            self.$set(route, '__testing', false);
+          });
+      }
+    },
+    mounted: function() {
+      this.loadRoutes();
+    }
+  });
+</script>
+</body>
+</html>

--
Gitblit v1.9.1