src/main/java/com/zy/ai/config/LlmConfig.java
File was deleted src/main/java/com/zy/ai/controller/LlmRouteConfigController.java
New file @@ -0,0 +1,131 @@ package com.zy.ai.controller; import com.baomidou.mybatisplus.mapper.EntityWrapper; import com.core.annotations.ManagerAuth; import com.core.common.R; import com.zy.ai.entity.LlmRouteConfig; import com.zy.ai.service.LlmRouteConfigService; import com.zy.ai.service.LlmRoutingService; import com.zy.common.web.BaseController; import lombok.RequiredArgsConstructor; import org.springframework.web.bind.annotation.*; import java.util.Date; import java.util.List; import java.util.Map; @RestController @RequestMapping("/ai/llm/config") @RequiredArgsConstructor public class LlmRouteConfigController extends BaseController { private final LlmRouteConfigService llmRouteConfigService; private final LlmRoutingService llmRoutingService; @GetMapping("/list/auth") @ManagerAuth public R list() { EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>(); wrapper.orderBy("priority", true).orderBy("id", true); List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper); return R.ok(list); } @PostMapping("/save/auth") @ManagerAuth public R save(@RequestBody LlmRouteConfig config) { if (config == null) { return R.error("参数不能为空"); } if (isBlank(config.getBaseUrl()) || isBlank(config.getApiKey()) || isBlank(config.getModel())) { return R.error("必须填写 baseUrl/apiKey/model"); } if (config.getId() == null) { llmRoutingService.fillAndNormalize(config, true); llmRouteConfigService.insert(config); } else { LlmRouteConfig db = llmRouteConfigService.selectById(config.getId()); if (db == null) { return R.error("配置不存在"); } // 保留统计字段,避免前端误覆盖 Integer failCount = db.getFailCount(); Integer successCount = db.getSuccessCount(); Integer consecutiveFailCount = db.getConsecutiveFailCount(); Date lastFailTime = db.getLastFailTime(); Date lastUsedTime = db.getLastUsedTime(); String lastError = db.getLastError(); llmRoutingService.fillAndNormalize(config, false); config.setFailCount(failCount); config.setSuccessCount(successCount); config.setConsecutiveFailCount(consecutiveFailCount); config.setLastFailTime(lastFailTime); config.setLastUsedTime(lastUsedTime); config.setLastError(lastError); config.setCreateTime(db.getCreateTime()); llmRouteConfigService.updateById(config); } llmRoutingService.evictCache(); return R.ok(config); } @PostMapping("/delete/auth") @ManagerAuth public R delete(@RequestParam("id") Long id) { if (id == null) { return R.error("id不能为空"); } llmRouteConfigService.deleteById(id); llmRoutingService.evictCache(); return R.ok(); } @PostMapping("/clearCooldown/auth") @ManagerAuth public R clearCooldown(@RequestParam("id") Long id) { if (id == null) { return R.error("id不能为空"); } LlmRouteConfig cfg = llmRouteConfigService.selectById(id); if (cfg == null) { return R.error("配置不存在"); } cfg.setCooldownUntil(null); cfg.setConsecutiveFailCount(0); cfg.setUpdateTime(new Date()); llmRouteConfigService.updateById(cfg); llmRoutingService.evictCache(); return R.ok(); } @PostMapping("/test/auth") @ManagerAuth public R test(@RequestBody LlmRouteConfig config) { if (config == null) { return R.error("参数不能为空"); } if (isBlank(config.getBaseUrl()) || isBlank(config.getApiKey()) || isBlank(config.getModel())) { return R.error("测试失败:必须填写 baseUrl/apiKey/model"); } Map<String, Object> data = llmRoutingService.testRoute(config); if (Boolean.TRUE.equals(data.get("ok")) && config.getId() != null) { LlmRouteConfig db = llmRouteConfigService.selectById(config.getId()); if (db != null) { db.setCooldownUntil(null); db.setConsecutiveFailCount(0); db.setUpdateTime(new Date()); llmRouteConfigService.updateById(db); llmRoutingService.evictCache(); } } return R.ok(data); } private boolean isBlank(String s) { return s == null || s.trim().isEmpty(); } } src/main/java/com/zy/ai/controller/WcsDiagnosisController.java
@@ -33,7 +33,7 @@ WcsDiagnosisRequest request = aiUtils.makeAiRequest(1000, "对当前系统进行巡检,如果有异常情况就进行详细的分析,如果没有异常情况则当成一次检查\n\n"); wcsDiagnosisService.diagnoseStream(request, emitter); } catch (Exception e) { emitter.completeWithError(e); try { emitter.complete(); } catch (Exception ignore) {} } }).start(); @@ -50,7 +50,7 @@ WcsDiagnosisRequest request = aiUtils.makeAiRequest(100, null); wcsDiagnosisService.askStream(request, prompt, chatId, reset, emitter); } catch (Exception e) { emitter.completeWithError(e); try { emitter.complete(); } catch (Exception ignore) {} } }).start(); return emitter; src/main/java/com/zy/ai/entity/LlmRouteConfig.java
New file @@ -0,0 +1,249 @@ package com.zy.ai.entity; import com.baomidou.mybatisplus.annotations.TableField; import com.baomidou.mybatisplus.annotations.TableId; import com.baomidou.mybatisplus.annotations.TableName; import com.baomidou.mybatisplus.enums.IdType; import java.io.Serializable; import java.util.Date; @TableName("sys_llm_route") public class LlmRouteConfig implements Serializable { private static final long serialVersionUID = 1L; @TableId(value = "id", type = IdType.AUTO) private Long id; private String name; @TableField("base_url") private String baseUrl; @TableField("api_key") private String apiKey; private String model; /** * 1 开启深度思考 0 关闭 */ private Short thinking; /** * 数字越小优先级越高 */ private Integer priority; /** * 1 启用 0 禁用 */ private Short status; @TableField("switch_on_quota") private Short switchOnQuota; @TableField("switch_on_error") private Short switchOnError; @TableField("cooldown_seconds") private Integer cooldownSeconds; @TableField("cooldown_until") private Date cooldownUntil; @TableField("fail_count") private Integer failCount; @TableField("success_count") private Integer successCount; @TableField("consecutive_fail_count") private Integer consecutiveFailCount; @TableField("last_error") private String lastError; @TableField("last_used_time") private Date lastUsedTime; @TableField("last_fail_time") private Date lastFailTime; @TableField("create_time") private Date createTime; @TableField("update_time") private Date updateTime; private String memo; public Long getId() { return id; } public void setId(Long id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getBaseUrl() { return baseUrl; } public void setBaseUrl(String baseUrl) { this.baseUrl = baseUrl; } public String getApiKey() { return apiKey; } public void setApiKey(String apiKey) { this.apiKey = apiKey; } public String getModel() { return model; } public void setModel(String model) { this.model = model; } public Short getThinking() { return thinking; } public void setThinking(Short thinking) { this.thinking = thinking; } public Integer getPriority() { return priority; } public void setPriority(Integer priority) { this.priority = priority; } public Short getStatus() { return status; } public void setStatus(Short status) { this.status = status; } public Short getSwitchOnQuota() { return switchOnQuota; } public void setSwitchOnQuota(Short switchOnQuota) { this.switchOnQuota = switchOnQuota; } public Short getSwitchOnError() { return switchOnError; } public void setSwitchOnError(Short switchOnError) { this.switchOnError = switchOnError; } public Integer getCooldownSeconds() { return cooldownSeconds; } public void setCooldownSeconds(Integer cooldownSeconds) { this.cooldownSeconds = cooldownSeconds; } public Date getCooldownUntil() { return cooldownUntil; } public void setCooldownUntil(Date cooldownUntil) { this.cooldownUntil = cooldownUntil; } public Integer getFailCount() { return failCount; } public void setFailCount(Integer failCount) { this.failCount = failCount; } public Integer getSuccessCount() { return successCount; } public void setSuccessCount(Integer successCount) { this.successCount = successCount; } public Integer getConsecutiveFailCount() { return consecutiveFailCount; } public void setConsecutiveFailCount(Integer consecutiveFailCount) { this.consecutiveFailCount = consecutiveFailCount; } public String getLastError() { return lastError; } public void setLastError(String lastError) { this.lastError = lastError; } public Date getLastUsedTime() { return lastUsedTime; } public void setLastUsedTime(Date lastUsedTime) { this.lastUsedTime = lastUsedTime; } public Date getLastFailTime() { return lastFailTime; } public void setLastFailTime(Date lastFailTime) { this.lastFailTime = lastFailTime; } public Date getCreateTime() { return createTime; } public void setCreateTime(Date createTime) { this.createTime = createTime; } public Date getUpdateTime() { return updateTime; } public void setUpdateTime(Date updateTime) { this.updateTime = updateTime; } public String getMemo() { return memo; } public void setMemo(String memo) { this.memo = memo; } } src/main/java/com/zy/ai/mapper/LlmRouteConfigMapper.java
New file @@ -0,0 +1,11 @@ package com.zy.ai.mapper; import com.baomidou.mybatisplus.mapper.BaseMapper; import com.zy.ai.entity.LlmRouteConfig; import org.apache.ibatis.annotations.Mapper; import org.springframework.stereotype.Repository; @Mapper @Repository public interface LlmRouteConfigMapper extends BaseMapper<LlmRouteConfig> { } src/main/java/com/zy/ai/service/LlmChatService.java
@@ -1,7 +1,11 @@ package com.zy.ai.service; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.zy.ai.entity.ChatCompletionRequest; import com.zy.ai.entity.ChatCompletionResponse; import com.zy.ai.entity.LlmRouteConfig; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; @@ -9,37 +13,33 @@ import org.springframework.http.MediaType; import org.springframework.stereotype.Service; import org.springframework.web.reactive.function.client.WebClient; import reactor.core.publisher.Mono; import reactor.core.publisher.Flux; import java.util.HashMap; import java.util.ArrayList; import java.util.List; import java.util.function.Consumer; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import java.util.function.Consumer; @Slf4j @Service @RequiredArgsConstructor public class LlmChatService { private final WebClient llmWebClient; private final LlmRoutingService llmRoutingService; @Value("${llm.api-key}") private String apiKey; @Value("${llm.base-url:}") private String fallbackBaseUrl; @Value("${llm.model}") private String model; @Value("${llm.api-key:}") private String fallbackApiKey; @Value("${llm.pythonPlatformUrl}") private String pythonPlatformUrl; @Value("${llm.model:}") private String fallbackModel; @Value("${llm.thinking}") private String thinking; @Value("${llm.thinking:false}") private String fallbackThinking; /** * 通用对话方法:传入 messages,返回大模型文本回复 @@ -49,27 +49,12 @@ Integer maxTokens) { ChatCompletionRequest req = new ChatCompletionRequest(); req.setModel(model); req.setMessages(messages); req.setTemperature(temperature != null ? temperature : 0.3); req.setMax_tokens(maxTokens != null ? maxTokens : 1024); req.setStream(false); ChatCompletionResponse response = llmWebClient.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .exchangeToMono(resp -> resp.bodyToFlux(String.class) .collectList() .map(list -> { String payload = String.join("\n\n", list); return parseCompletion(payload); })) .doOnError(ex -> log.error("调用 LLM 失败", ex)) .onErrorResume(ex -> Mono.empty()) .block(); ChatCompletionResponse response = complete(req); if (response == null || response.getChoices() == null || @@ -88,17 +73,10 @@ Integer maxTokens, List<Object> tools) { ChatCompletionRequest req = new ChatCompletionRequest(); req.setModel(model); req.setMessages(messages); req.setTemperature(temperature != null ? temperature : 0.3); req.setMax_tokens(maxTokens != null ? maxTokens : 1024); req.setStream(false); if(thinking.equals("enable")) { ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking(); thinking.setType("enable"); req.setThinking(thinking); } if (tools != null && !tools.isEmpty()) { req.setTools(tools); req.setTool_choice("auto"); @@ -107,26 +85,42 @@ } public ChatCompletionResponse complete(ChatCompletionRequest req) { try { return llmWebClient.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .exchangeToMono(resp -> resp.bodyToFlux(String.class) .collectList() .map(list -> { String payload = String.join("\n\n", list); return parseCompletion(payload); })) .doOnError(ex -> log.error("调用 LLM 失败", ex)) .onErrorResume(ex -> Mono.empty()) .block(); } catch (Exception e) { log.error("调用 LLM 失败", e); List<ResolvedRoute> routes = resolveRoutes(); if (routes.isEmpty()) { log.error("调用 LLM 失败: 未配置可用 LLM 路由"); return null; } Throwable last = null; for (int i = 0; i < routes.size(); i++) { ResolvedRoute route = routes.get(i); boolean hasNext = i < routes.size() - 1; try { ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, false); ChatCompletionResponse resp = callCompletion(route, routeReq); if (!isValidCompletion(resp)) { throw new RuntimeException("LLM 响应为空"); } markSuccess(route); return resp; } catch (Throwable ex) { last = ex; boolean quota = isQuotaExhausted(ex); boolean canSwitch = shouldSwitch(route, quota); markFailure(route, ex, canSwitch); if (hasNext && canSwitch) { log.warn("LLM 切换到下一路由, current={}, reason={}", route.tag(), errorText(ex)); continue; } log.error("调用 LLM 失败, route={}", route.tag(), ex); break; } } if (last != null) { log.error("调用 LLM 全部路由失败: {}", errorText(last)); } return null; } public void chatStream(List<ChatCompletionRequest.Message> messages, @@ -137,92 +131,12 @@ Consumer<Throwable> onError) { ChatCompletionRequest req = new ChatCompletionRequest(); req.setModel(model); req.setMessages(messages); req.setTemperature(temperature != null ? temperature : 0.3); req.setMax_tokens(maxTokens != null ? maxTokens : 1024); req.setStream(true); Flux<String> flux = llmWebClient.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .retrieve() .bodyToFlux(String.class) .doOnError(ex -> log.error("调用 LLM 流式失败", ex)); AtomicBoolean doneSeen = new AtomicBoolean(false); AtomicBoolean errorSeen = new AtomicBoolean(false); LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>(); Thread drain = new Thread(() -> { try { while (true) { String s = queue.poll(2, TimeUnit.SECONDS); if (s != null) { try { onChunk.accept(s); } catch (Exception ignore) {} } if (doneSeen.get() && queue.isEmpty()) { if (!errorSeen.get()) { try { if (onComplete != null) onComplete.run(); } catch (Exception ignore) {} } break; } } } catch (InterruptedException ignore) { ignore.printStackTrace(); } }); drain.setDaemon(true); drain.start(); flux.subscribe(payload -> { if (payload == null || payload.isEmpty()) return; String[] events = payload.split("\\r?\\n\\r?\\n"); for (String part : events) { String s = part; if (s == null || s.isEmpty()) continue; if (s.startsWith("data:")) { s = s.substring(5); if (s.startsWith(" ")) s = s.substring(1); } if ("[DONE]".equals(s.trim())) { doneSeen.set(true); continue; } try { JSONObject obj = JSON.parseObject(s); JSONArray choices = obj.getJSONArray("choices"); if (choices != null && !choices.isEmpty()) { JSONObject c0 = choices.getJSONObject(0); JSONObject delta = c0.getJSONObject("delta"); if (delta != null) { String content = delta.getString("content"); if (content != null) { try { queue.offer(content); } catch (Exception ignore) {} } } } } catch (Exception e) { e.printStackTrace(); } } }, err -> { errorSeen.set(true); doneSeen.set(true); if (onError != null) onError.accept(err); }, () -> { if (!doneSeen.get()) { errorSeen.set(true); doneSeen.set(true); if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成")); } else { doneSeen.set(true); } }); streamWithFailover(req, onChunk, onComplete, onError); } public void chatStreamWithTools(List<ChatCompletionRequest.Message> messages, @@ -233,120 +147,46 @@ Runnable onComplete, Consumer<Throwable> onError) { ChatCompletionRequest req = new ChatCompletionRequest(); req.setModel(model); req.setMessages(messages); req.setTemperature(temperature != null ? temperature : 0.3); req.setMax_tokens(maxTokens != null ? maxTokens : 1024); req.setStream(true); if(thinking.equals("enable")) { ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking(); thinking.setType("enable"); req.setThinking(thinking); } if (tools != null && !tools.isEmpty()) { req.setTools(tools); req.setTool_choice("auto"); } Flux<String> flux = llmWebClient.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .retrieve() .bodyToFlux(String.class) .doOnError(ex -> log.error("调用 LLM 流式失败", ex)); AtomicBoolean doneSeen = new AtomicBoolean(false); AtomicBoolean errorSeen = new AtomicBoolean(false); LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>(); Thread drain = new Thread(() -> { try { while (true) { String s = queue.poll(5, TimeUnit.SECONDS); if (s != null) { try { onChunk.accept(s); } catch (Exception ignore) {} } if (doneSeen.get() && queue.isEmpty()) { if (!errorSeen.get()) { try { if (onComplete != null) onComplete.run(); } catch (Exception ignore) {} } break; } } } catch (InterruptedException ignore) { ignore.printStackTrace(); } }); drain.setDaemon(true); drain.start(); flux.subscribe(payload -> { if (payload == null || payload.isEmpty()) return; String[] events = payload.split("\\r?\\n\\r?\\n"); for (String part : events) { String s = part; if (s == null || s.isEmpty()) continue; if (s.startsWith("data:")) { s = s.substring(5); if (s.startsWith(" ")) s = s.substring(1); } if ("[DONE]".equals(s.trim())) { doneSeen.set(true); continue; } try { JSONObject obj = JSON.parseObject(s); JSONArray choices = obj.getJSONArray("choices"); if (choices != null && !choices.isEmpty()) { JSONObject c0 = choices.getJSONObject(0); JSONObject delta = c0.getJSONObject("delta"); if (delta != null) { String content = delta.getString("content"); if (content != null) { try { queue.offer(content); } catch (Exception ignore) {} } } } } catch (Exception e) { e.printStackTrace(); } } }, err -> { errorSeen.set(true); doneSeen.set(true); if (onError != null) onError.accept(err); }, () -> { if (!doneSeen.get()) { errorSeen.set(true); doneSeen.set(true); if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成")); } else { doneSeen.set(true); } }); streamWithFailover(req, onChunk, onComplete, onError); } public void chatStreamRunPython(String prompt, String chatId, Consumer<String> onChunk, private void streamWithFailover(ChatCompletionRequest req, Consumer<String> onChunk, Runnable onComplete, Consumer<Throwable> onError) { HashMap<String, Object> req = new HashMap<>(); req.put("prompt", prompt); req.put("chatId", chatId); List<ResolvedRoute> routes = resolveRoutes(); if (routes.isEmpty()) { if (onError != null) onError.accept(new RuntimeException("未配置可用 LLM 路由")); return; } attemptStream(routes, 0, req, onChunk, onComplete, onError); } Flux<String> flux = llmWebClient.post() .uri(pythonPlatformUrl) .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .retrieve() .bodyToFlux(String.class) .doOnError(ex -> log.error("调用 LLM 流式失败", ex)); private void attemptStream(List<ResolvedRoute> routes, int index, ChatCompletionRequest req, Consumer<String> onChunk, Runnable onComplete, Consumer<Throwable> onError) { if (index >= routes.size()) { if (onError != null) onError.accept(new RuntimeException("LLM 路由全部失败")); return; } ResolvedRoute route = routes.get(index); ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, true); AtomicBoolean doneSeen = new AtomicBoolean(false); AtomicBoolean errorSeen = new AtomicBoolean(false); AtomicBoolean emitted = new AtomicBoolean(false); LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>(); Thread drain = new Thread(() -> { @@ -354,6 +194,7 @@ while (true) { String s = queue.poll(2, TimeUnit.SECONDS); if (s != null) { emitted.set(true); try { onChunk.accept(s); } catch (Exception ignore) { @@ -370,13 +211,12 @@ } } } catch (InterruptedException ignore) { ignore.printStackTrace(); } }); drain.setDaemon(true); drain.start(); flux.subscribe(payload -> { streamFlux(route, routeReq).subscribe(payload -> { if (payload == null || payload.isEmpty()) return; String[] events = payload.split("\\r?\\n\\r?\\n"); for (String part : events) { @@ -390,10 +230,6 @@ doneSeen.set(true); continue; } if("<think>".equals(s.trim()) || "</think>".equals(s.trim())) { queue.offer(s.trim()); continue; } try { JSONObject obj = JSON.parseObject(s); JSONArray choices = obj.getJSONArray("choices"); @@ -403,30 +239,190 @@ if (delta != null) { String content = delta.getString("content"); if (content != null) { try { queue.offer(content); } catch (Exception ignore) { } queue.offer(content); } } } } catch (Exception e) { e.printStackTrace(); log.warn("解析 LLM stream 片段失败: {}", e.getMessage()); } } }, err -> { errorSeen.set(true); doneSeen.set(true); boolean quota = isQuotaExhausted(err); boolean canSwitch = shouldSwitch(route, quota); markFailure(route, err, canSwitch); if (!emitted.get() && canSwitch && index < routes.size() - 1) { log.warn("LLM 路由失败,自动切换,current={}, reason={}", route.tag(), errorText(err)); attemptStream(routes, index + 1, req, onChunk, onComplete, onError); return; } if (onError != null) onError.accept(err); }, () -> { if (!doneSeen.get()) { RuntimeException ex = new RuntimeException("LLM 流意外完成"); errorSeen.set(true); doneSeen.set(true); if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成")); boolean canSwitch = shouldSwitch(route, false); markFailure(route, ex, canSwitch); if (!emitted.get() && canSwitch && index < routes.size() - 1) { log.warn("LLM 路由流异常完成,自动切换,current={}", route.tag()); attemptStream(routes, index + 1, req, onChunk, onComplete, onError); } else { if (onError != null) onError.accept(ex); } } else { markSuccess(route); doneSeen.set(true); } }); } private Flux<String> streamFlux(ResolvedRoute route, ChatCompletionRequest req) { WebClient client = WebClient.builder().baseUrl(route.baseUrl).build(); return client.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + route.apiKey) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .exchangeToFlux(resp -> { int status = resp.rawStatusCode(); if (status >= 200 && status < 300) { return resp.bodyToFlux(String.class); } return resp.bodyToMono(String.class) .defaultIfEmpty("") .flatMapMany(body -> Flux.error(new LlmRouteException(status, body))); }) .doOnError(ex -> log.error("调用 LLM 流式失败, route={}", route.tag(), ex)); } private ChatCompletionResponse callCompletion(ResolvedRoute route, ChatCompletionRequest req) { WebClient client = WebClient.builder().baseUrl(route.baseUrl).build(); RawCompletionResult raw = client.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + route.apiKey) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .exchangeToMono(resp -> resp.bodyToFlux(String.class) .collectList() .map(list -> new RawCompletionResult(resp.rawStatusCode(), String.join("\\n\\n", list)))) .block(); if (raw == null) { throw new RuntimeException("LLM 返回为空"); } if (raw.statusCode < 200 || raw.statusCode >= 300) { throw new LlmRouteException(raw.statusCode, raw.payload); } return parseCompletion(raw.payload); } private ChatCompletionRequest applyRoute(ChatCompletionRequest req, ResolvedRoute route, boolean stream) { req.setModel(route.model); req.setStream(stream); if (route.thinkingEnabled) { ChatCompletionRequest.Thinking t = new ChatCompletionRequest.Thinking(); t.setType("enable"); req.setThinking(t); } else { req.setThinking(null); } return req; } private ChatCompletionRequest cloneRequest(ChatCompletionRequest src) { ChatCompletionRequest req = new ChatCompletionRequest(); req.setModel(src.getModel()); req.setMessages(src.getMessages()); req.setTemperature(src.getTemperature()); req.setMax_tokens(src.getMax_tokens()); req.setStream(src.getStream()); req.setTools(src.getTools()); req.setTool_choice(src.getTool_choice()); req.setThinking(src.getThinking()); return req; } private boolean isValidCompletion(ChatCompletionResponse response) { if (response == null || response.getChoices() == null || response.getChoices().isEmpty()) { return false; } ChatCompletionRequest.Message message = response.getChoices().get(0).getMessage(); if (message == null) { return false; } if (!isBlank(message.getContent())) { return true; } return message.getTool_calls() != null && !message.getTool_calls().isEmpty(); } private boolean shouldSwitch(ResolvedRoute route, boolean quota) { return quota ? route.switchOnQuota : route.switchOnError; } private void markSuccess(ResolvedRoute route) { if (route.id != null) { llmRoutingService.markSuccess(route.id); } } private void markFailure(ResolvedRoute route, Throwable ex, boolean enterCooldown) { if (route.id != null) { llmRoutingService.markFailure(route.id, errorText(ex), enterCooldown, route.cooldownSeconds); } } private String errorText(Throwable ex) { if (ex == null) return "unknown"; if (ex instanceof LlmRouteException) { LlmRouteException e = (LlmRouteException) ex; String body = e.body == null ? "" : e.body; if (body.length() > 240) { body = body.substring(0, 240); } return "status=" + e.statusCode + ", body=" + body; } return ex.getMessage() == null ? ex.toString() : ex.getMessage(); } private boolean isQuotaExhausted(Throwable ex) { if (!(ex instanceof LlmRouteException)) return false; LlmRouteException e = (LlmRouteException) ex; if (e.statusCode == 429) return true; String text = (e.body == null ? "" : e.body).toLowerCase(); return text.contains("insufficient_quota") || text.contains("quota") || text.contains("余额") || text.contains("用量") || text.contains("超限") || text.contains("rate limit"); } private List<ResolvedRoute> resolveRoutes() { List<ResolvedRoute> routes = new ArrayList<>(); List<LlmRouteConfig> dbRoutes = llmRoutingService.listAvailableRoutes(); for (LlmRouteConfig c : dbRoutes) { routes.add(ResolvedRoute.fromDb(c)); } // 兼容:数据库为空时,回退到 yml if (routes.isEmpty() && !isBlank(fallbackBaseUrl) && !isBlank(fallbackApiKey) && !isBlank(fallbackModel)) { routes.add(ResolvedRoute.fromFallback(fallbackBaseUrl, fallbackApiKey, fallbackModel, isFallbackThinkingEnabled())); } return routes; } private boolean isFallbackThinkingEnabled() { String x = fallbackThinking == null ? "" : fallbackThinking.trim().toLowerCase(); return "true".equals(x) || "1".equals(x) || "enable".equals(x); } private boolean isBlank(String s) { return s == null || s.trim().isEmpty(); } private ChatCompletionResponse mergeSseChunk(ChatCompletionResponse acc, String payload) { @@ -452,7 +448,7 @@ ChatCompletionResponse.Choice choice = new ChatCompletionResponse.Choice(); ChatCompletionRequest.Message msg = new ChatCompletionRequest.Message(); choice.setMessage(msg); java.util.ArrayList<ChatCompletionResponse.Choice> list = new java.util.ArrayList<>(); ArrayList<ChatCompletionResponse.Choice> list = new ArrayList<>(); list.add(choice); acc.setChoices(list); } @@ -490,7 +486,8 @@ if (created != null) acc.setCreated(created); String object = obj.getString("object"); if (object != null && !object.isEmpty()) acc.setObjectName(object); } catch (Exception ignore) {} } catch (Exception ignore) { } } return acc; } @@ -502,7 +499,8 @@ if (r != null && r.getChoices() != null && !r.getChoices().isEmpty() && r.getChoices().get(0).getMessage() != null) { return r; } } catch (Exception ignore) {} } catch (Exception ignore) { } ChatCompletionResponse sse = mergeSseChunk(new ChatCompletionResponse(), payload); if (sse.getChoices() != null && !sse.getChoices().isEmpty() && sse.getChoices().get(0).getMessage() != null && sse.getChoices().get(0).getMessage().getContent() != null) { return sse; @@ -513,9 +511,75 @@ msg.setRole("assistant"); msg.setContent(payload); choice.setMessage(msg); java.util.ArrayList<ChatCompletionResponse.Choice> list = new java.util.ArrayList<>(); ArrayList<ChatCompletionResponse.Choice> list = new ArrayList<>(); list.add(choice); r.setChoices(list); return r; } private static class RawCompletionResult { private final int statusCode; private final String payload; private RawCompletionResult(int statusCode, String payload) { this.statusCode = statusCode; this.payload = payload; } } private static class LlmRouteException extends RuntimeException { private final int statusCode; private final String body; private LlmRouteException(int statusCode, String body) { super("http status=" + statusCode); this.statusCode = statusCode; this.body = body; } } private static class ResolvedRoute { private Long id; private String name; private String baseUrl; private String apiKey; private String model; private boolean thinkingEnabled; private boolean switchOnQuota; private boolean switchOnError; private Integer cooldownSeconds; private static ResolvedRoute fromDb(LlmRouteConfig c) { ResolvedRoute r = new ResolvedRoute(); r.id = c.getId(); r.name = c.getName(); r.baseUrl = c.getBaseUrl(); r.apiKey = c.getApiKey(); r.model = c.getModel(); r.thinkingEnabled = c.getThinking() != null && c.getThinking() == 1; r.switchOnQuota = c.getSwitchOnQuota() == null || c.getSwitchOnQuota() == 1; r.switchOnError = c.getSwitchOnError() == null || c.getSwitchOnError() == 1; r.cooldownSeconds = c.getCooldownSeconds(); return r; } private static ResolvedRoute fromFallback(String baseUrl, String apiKey, String model, boolean thinkingEnabled) { ResolvedRoute r = new ResolvedRoute(); r.name = "fallback-yml"; r.baseUrl = baseUrl; r.apiKey = apiKey; r.model = model; r.thinkingEnabled = thinkingEnabled; r.switchOnQuota = true; r.switchOnError = true; r.cooldownSeconds = 300; return r; } private String tag() { String showName = name == null ? "unnamed" : name; String showModel = model == null ? "" : (" model=" + model); return showName + showModel; } } } src/main/java/com/zy/ai/service/LlmRouteConfigService.java
New file @@ -0,0 +1,7 @@ package com.zy.ai.service; import com.baomidou.mybatisplus.service.IService; import com.zy.ai.entity.LlmRouteConfig; public interface LlmRouteConfigService extends IService<LlmRouteConfig> { } src/main/java/com/zy/ai/service/LlmRoutingService.java
New file @@ -0,0 +1,270 @@ package com.zy.ai.service; import com.baomidou.mybatisplus.mapper.EntityWrapper; import com.zy.ai.entity.LlmRouteConfig; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; import org.springframework.stereotype.Service; import org.springframework.web.reactive.function.client.WebClient; import reactor.core.publisher.Mono; import java.time.Duration; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; @Slf4j @Service @RequiredArgsConstructor public class LlmRoutingService { private static final long CACHE_TTL_MS = 3000L; private final LlmRouteConfigService llmRouteConfigService; private volatile List<LlmRouteConfig> allRouteCache = Collections.emptyList(); private volatile long cacheExpireAt = 0L; public void evictCache() { cacheExpireAt = 0L; } public List<LlmRouteConfig> listAllOrdered() { return new ArrayList<>(loadAllRoutes()); } public List<LlmRouteConfig> listAvailableRoutes() { Date now = new Date(); List<LlmRouteConfig> result = new ArrayList<>(); List<LlmRouteConfig> coolingRoutes = new ArrayList<>(); int total = 0; int disabled = 0; int invalid = 0; for (LlmRouteConfig c : loadAllRoutes()) { total++; if (!isEnabled(c)) { disabled++; continue; } if (isBlank(c.getBaseUrl()) || isBlank(c.getApiKey()) || isBlank(c.getModel())) { invalid++; continue; } if (isCooling(c, now)) { coolingRoutes.add(c); continue; } result.add(c); } if (result.isEmpty() && !coolingRoutes.isEmpty()) { // 避免所有路由都处于冷却时系统完全不可用,降级允许使用冷却路由 log.warn("LLM 路由均处于冷却,降级启用冷却路由。cooling={}, total={}", coolingRoutes.size(), total); return coolingRoutes; } if (result.isEmpty()) { log.warn("未找到可用 LLM 路由。total={}, disabled={}, invalid={}", total, disabled, invalid); } return result; } public void markSuccess(Long routeId) { if (routeId == null) return; try { LlmRouteConfig db = llmRouteConfigService.selectById(routeId); if (db == null) return; db.setSuccessCount(nvl(db.getSuccessCount()) + 1); db.setConsecutiveFailCount(0); db.setLastUsedTime(new Date()); db.setUpdateTime(new Date()); llmRouteConfigService.updateById(db); evictCache(); } catch (Exception e) { log.warn("更新路由成功状态失败, routeId={}", routeId, e); } } public void markFailure(Long routeId, String errorText, boolean enterCooldown, Integer cooldownSeconds) { if (routeId == null) return; try { LlmRouteConfig db = llmRouteConfigService.selectById(routeId); if (db == null) return; Date now = new Date(); db.setFailCount(nvl(db.getFailCount()) + 1); db.setConsecutiveFailCount(nvl(db.getConsecutiveFailCount()) + 1); db.setLastFailTime(now); db.setLastError(trimError(errorText)); if (enterCooldown) { int sec = cooldownSeconds != null && cooldownSeconds > 0 ? cooldownSeconds : defaultCooldown(db.getCooldownSeconds()); db.setCooldownUntil(new Date(now.getTime() + sec * 1000L)); } db.setUpdateTime(now); llmRouteConfigService.updateById(db); evictCache(); } catch (Exception e) { log.warn("更新路由失败状态失败, routeId={}", routeId, e); } } private int defaultCooldown(Integer sec) { return sec == null || sec <= 0 ? 300 : sec; } private String trimError(String err) { if (err == null) return null; String x = err.replace("\n", " ").replace("\r", " "); return x.length() > 500 ? x.substring(0, 500) : x; } private Integer nvl(Integer x) { return x == null ? 0 : x; } private boolean isEnabled(LlmRouteConfig c) { return c != null && c.getStatus() != null && c.getStatus() == 1; } private boolean isCooling(LlmRouteConfig c, Date now) { return c != null && c.getCooldownUntil() != null && c.getCooldownUntil().after(now); } private List<LlmRouteConfig> loadAllRoutes() { long now = System.currentTimeMillis(); if (now < cacheExpireAt && allRouteCache != null) { return allRouteCache; } synchronized (this) { now = System.currentTimeMillis(); if (now < cacheExpireAt && allRouteCache != null) { return allRouteCache; } EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>(); wrapper.orderBy("priority", true).orderBy("id", true); List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper); allRouteCache = list == null ? Collections.emptyList() : list; cacheExpireAt = System.currentTimeMillis() + CACHE_TTL_MS; return allRouteCache; } } private String safe(String s) { return s == null ? "" : s.trim(); } private boolean isBlank(String s) { return s == null || s.trim().isEmpty(); } public LlmRouteConfig fillAndNormalize(LlmRouteConfig cfg, boolean isCreate) { Date now = new Date(); if (isBlank(cfg.getName())) { cfg.setName("LLM_ROUTE_" + now.getTime()); } if (cfg.getThinking() == null) { cfg.setThinking((short) 0); } if (cfg.getPriority() == null) { cfg.setPriority(100); } if (cfg.getStatus() == null) { cfg.setStatus((short) 1); } if (cfg.getSwitchOnQuota() == null) { cfg.setSwitchOnQuota((short) 1); } if (cfg.getSwitchOnError() == null) { cfg.setSwitchOnError((short) 1); } if (cfg.getCooldownSeconds() == null || cfg.getCooldownSeconds() < 0) { cfg.setCooldownSeconds(300); } if (cfg.getFailCount() == null) { cfg.setFailCount(0); } if (cfg.getSuccessCount() == null) { cfg.setSuccessCount(0); } if (cfg.getConsecutiveFailCount() == null) { cfg.setConsecutiveFailCount(0); } if (isCreate) { cfg.setCreateTime(now); } cfg.setUpdateTime(now); return cfg; } public Map<String, Object> testRoute(LlmRouteConfig cfg) { HashMap<String, Object> result = new HashMap<>(); long start = System.currentTimeMillis(); try { TestHttpResult raw = testJavaRoute(cfg); fillTestResult(result, raw, start); } catch (Exception e) { result.put("ok", false); result.put("statusCode", -1); result.put("latencyMs", System.currentTimeMillis() - start); result.put("message", "测试异常: " + safe(e.getMessage())); result.put("responseSnippet", ""); } return result; } private void fillTestResult(HashMap<String, Object> result, TestHttpResult raw, long start) { boolean ok = raw.statusCode >= 200 && raw.statusCode < 300; result.put("ok", ok); result.put("statusCode", raw.statusCode); result.put("latencyMs", System.currentTimeMillis() - start); result.put("message", ok ? "测试成功" : "测试失败"); result.put("responseSnippet", trimBody(raw.body)); } private TestHttpResult testJavaRoute(LlmRouteConfig cfg) { HashMap<String, Object> req = new HashMap<>(); req.put("model", cfg.getModel()); List<Map<String, String>> messages = new ArrayList<>(); HashMap<String, String> msg = new HashMap<>(); msg.put("role", "user"); msg.put("content", "ping"); messages.add(msg); req.put("messages", messages); req.put("stream", false); req.put("max_tokens", 8); req.put("temperature", 0); WebClient client = WebClient.builder().baseUrl(cfg.getBaseUrl()).build(); return client.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + cfg.getApiKey()) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .exchangeToMono(resp -> resp.bodyToMono(String.class) .defaultIfEmpty("") .map(body -> new TestHttpResult(resp.rawStatusCode(), body))) .timeout(Duration.ofSeconds(12)) .onErrorResume(ex -> Mono.just(new TestHttpResult(-1, safe(ex.getMessage())))) .block(); } private String trimBody(String body) { String x = safe(body).replace("\r", " ").replace("\n", " "); return x.length() > 300 ? x.substring(0, 300) : x; } private static class TestHttpResult { private final int statusCode; private final String body; private TestHttpResult(int statusCode, String body) { this.statusCode = statusCode; this.body = body; } } } src/main/java/com/zy/ai/service/PythonService.java
File was deleted src/main/java/com/zy/ai/service/WcsDiagnosisService.java
@@ -11,7 +11,6 @@ import com.zy.common.utils.RedisUtil; import com.zy.core.enums.RedisKeyType; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -28,8 +27,6 @@ private static final long CHAT_TTL_SECONDS = 7L * 24 * 3600; @Value("${llm.platform}") private String platform; @Autowired private LlmChatService llmChatService; @Autowired @@ -40,8 +37,6 @@ private AiUtils aiUtils; @Autowired(required = false) private McpController mcpController; @Autowired private PythonService pythonService; public void diagnoseStream(WcsDiagnosisRequest request, SseEmitter emitter) { List<ChatCompletionRequest.Message> messages = new ArrayList<>(); @@ -85,7 +80,7 @@ try { try { emitter.send(SseEmitter.event().data("【AI】运行已停止(异常)")); } catch (Exception ignore) {} log.error("AI diagnose stream stopped: error", e); emitter.completeWithError(e); emitter.complete(); } catch (Exception ignore) {} }); } @@ -95,11 +90,6 @@ String chatId, boolean reset, SseEmitter emitter) { if (platform.equals("python")) { pythonService.runPython(prompt, chatId, emitter); return; } List<ChatCompletionRequest.Message> messages = new ArrayList<>(); List<ChatCompletionRequest.Message> history = null; @@ -187,7 +177,10 @@ emitter.complete(); } catch (Exception ignore) {} }, e -> { try { emitter.completeWithError(e); } catch (Exception ignore) {} try { try { emitter.send(SseEmitter.event().data("【AI】运行已停止(异常)")); } catch (Exception ignore) {} emitter.complete(); } catch (Exception ignore) {} }); } @@ -380,7 +373,7 @@ try { sse(emitter, "\\n\\n【AI】运行已停止(异常)\\n\\n"); log.error("AI MCP diagnose stopped: error", e); emitter.completeWithError(e); emitter.complete(); } catch (Exception ignore) {} return true; } src/main/java/com/zy/ai/service/impl/LlmRouteConfigServiceImpl.java
New file @@ -0,0 +1,11 @@ package com.zy.ai.service.impl; import com.baomidou.mybatisplus.service.impl.ServiceImpl; import com.zy.ai.entity.LlmRouteConfig; import com.zy.ai.mapper.LlmRouteConfigMapper; import com.zy.ai.service.LlmRouteConfigService; import org.springframework.stereotype.Service; @Service("llmRouteConfigService") public class LlmRouteConfigServiceImpl extends ServiceImpl<LlmRouteConfigMapper, LlmRouteConfig> implements LlmRouteConfigService { } src/main/resources/application.yml
@@ -1,6 +1,6 @@ # 系统版本信息 app: version: 1.0.4.2 version: 1.0.4.3 version-type: dev # prd 或 dev server: @@ -83,27 +83,15 @@ expireDays: 7 llm: platform: java pythonPlatformUrl: http://127.0.0.1:9000/ai/diagnose/askStream thinking: enable # base-url: https://api.siliconflow.cn/v1 # api-key: sk-sxdtebtquwrugzrmaqqqkzdzmrgzhzmplwwuowysdasccent # model: deepseek-ai/DeepSeek-V3.2 # base-url: http://47.76.147.249:9998/e/7g7kqxxt1ei2un71 # api-key: app-mP0O6aY5WpbfaHs7BNnjVkli # model: deepseek-ai/DeepSeek-V3.2 # base-url: http://34.2.134.223:3000/v1 # api-key: sk-WabrmtOezCFwVo7XvVOrO3QkmfcKG7T7jy0BaVnmQTWm5GXh # model: gemini-3-pro-preview # base-url: http://127.0.0.1:8317/v1 # api-key: WznOjAGJNVFKSe9kBZTr # model: gpt-5 base-url: https://api.xiaomimimo.com/v1 api-key: sk-cw7e4se9cal8cxdgjml8dmtn4pdmqtvfccg5fcermt0ddtys model: mimo-v2-flash # 现已迁移到数据库表 sys_llm_route 维护(支持多API/多模型/多Key自动切换) # 以下仅作为数据库为空时的兼容回退配置 thinking: false base-url: api-key: model: perf: methodTiming: enabled: false thresholdMs: 50 sampleRate: 1.0 sampleRate: 1.0 src/main/resources/mapper/LlmRouteConfigMapper.xml
New file @@ -0,0 +1,29 @@ <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <mapper namespace="com.zy.ai.mapper.LlmRouteConfigMapper"> <resultMap id="BaseResultMap" type="com.zy.ai.entity.LlmRouteConfig"> <id column="id" property="id"/> <result column="name" property="name"/> <result column="base_url" property="baseUrl"/> <result column="api_key" property="apiKey"/> <result column="model" property="model"/> <result column="thinking" property="thinking"/> <result column="priority" property="priority"/> <result column="status" property="status"/> <result column="switch_on_quota" property="switchOnQuota"/> <result column="switch_on_error" property="switchOnError"/> <result column="cooldown_seconds" property="cooldownSeconds"/> <result column="cooldown_until" property="cooldownUntil"/> <result column="fail_count" property="failCount"/> <result column="success_count" property="successCount"/> <result column="consecutive_fail_count" property="consecutiveFailCount"/> <result column="last_error" property="lastError"/> <result column="last_used_time" property="lastUsedTime"/> <result column="last_fail_time" property="lastFailTime"/> <result column="create_time" property="createTime"/> <result column="update_time" property="updateTime"/> <result column="memo" property="memo"/> </resultMap> </mapper> src/main/resources/sql/20260303_add_ai_config_menu.sql
New file @@ -0,0 +1,46 @@ -- 将 AI配置 菜单挂载到:开发专用 -> AI配置 -- 说明:本系统菜单来源于 sys_resource,执行本脚本后请在“角色授权”里给对应角色勾选新菜单。 -- 1) 定位“开发专用”一级菜单 SET @dev_parent_id := ( SELECT id FROM sys_resource WHERE name = '开发专用' AND level = 1 ORDER BY id LIMIT 1 ); -- 2) 新增二级菜单:AI配置(页面) INSERT INTO sys_resource(code, name, resource_id, level, sort, status) SELECT 'ai/llm_config.html', 'AI配置', @dev_parent_id, 2, 999, 1 FROM dual WHERE @dev_parent_id IS NOT NULL AND NOT EXISTS ( SELECT 1 FROM sys_resource WHERE code = 'ai/llm_config.html' AND level = 2 ); -- 3) 新增三级按钮权限:查看(用于角色细粒度授权) SET @ai_cfg_id := ( SELECT id FROM sys_resource WHERE code = 'ai/llm_config.html' AND level = 2 ORDER BY id LIMIT 1 ); INSERT INTO sys_resource(code, name, resource_id, level, sort, status) SELECT 'ai/llm_config.html#view', '查看', @ai_cfg_id, 3, 1, 1 FROM dual WHERE @ai_cfg_id IS NOT NULL AND NOT EXISTS ( SELECT 1 FROM sys_resource WHERE code = 'ai/llm_config.html#view' AND level = 3 ); -- 可选检查 SELECT id, code, name, resource_id, level, sort, status FROM sys_resource WHERE code IN ('ai/llm_config.html', 'ai/llm_config.html#view'); src/main/resources/sql/20260303_create_sys_llm_route.sql
New file @@ -0,0 +1,32 @@ CREATE TABLE IF NOT EXISTS `sys_llm_route` ( `id` BIGINT NOT NULL AUTO_INCREMENT COMMENT '主键', `name` VARCHAR(64) NOT NULL COMMENT '路由名称', `base_url` VARCHAR(255) DEFAULT NULL COMMENT 'LLM API Base URL', `api_key` VARCHAR(512) DEFAULT NULL COMMENT 'API Key', `model` VARCHAR(128) DEFAULT NULL COMMENT '模型名', `thinking` TINYINT NOT NULL DEFAULT 0 COMMENT '是否开启深度思考:1是0否', `priority` INT NOT NULL DEFAULT 100 COMMENT '优先级(越小越优先)', `status` TINYINT NOT NULL DEFAULT 1 COMMENT '状态:1启用0禁用', `switch_on_quota` TINYINT NOT NULL DEFAULT 1 COMMENT '额度耗尽时是否切换', `switch_on_error` TINYINT NOT NULL DEFAULT 1 COMMENT '故障时是否切换', `cooldown_seconds` INT NOT NULL DEFAULT 300 COMMENT '故障后冷却秒数', `cooldown_until` DATETIME DEFAULT NULL COMMENT '冷却截止时间', `fail_count` INT NOT NULL DEFAULT 0 COMMENT '总失败次数', `success_count` INT NOT NULL DEFAULT 0 COMMENT '总成功次数', `consecutive_fail_count` INT NOT NULL DEFAULT 0 COMMENT '连续失败次数', `last_error` VARCHAR(512) DEFAULT NULL COMMENT '最近错误摘要', `last_used_time` DATETIME DEFAULT NULL COMMENT '最近成功调用时间', `last_fail_time` DATETIME DEFAULT NULL COMMENT '最近失败时间', `create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', `update_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', `memo` VARCHAR(255) DEFAULT NULL COMMENT '备注', PRIMARY KEY (`id`), KEY `idx_sys_llm_route_status_priority` (`status`, `priority`), KEY `idx_sys_llm_route_cooldown` (`cooldown_until`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='LLM 路由配置(多API/多模型/多Key自动切换)'; -- 示例数据(按需修改后执行) -- INSERT INTO sys_llm_route(name, base_url, api_key, model, thinking, priority, status, switch_on_quota, switch_on_error, cooldown_seconds) -- VALUES ('主路由-gpt5', 'https://api.xiaomimimo.com/v1', 'sk-xxxx', 'gpt-5', 1, 10, 1, 1, 1, 300); -- INSERT INTO sys_llm_route(name, base_url, api_key, model, thinking, priority, status, switch_on_quota, switch_on_error, cooldown_seconds) -- VALUES ('备路由-mimo', 'https://api.xiaomimimo.com/v1', 'sk-yyyy', 'mimo-v2-flash', 0, 20, 1, 1, 1, 300); src/main/webapp/views/ai/llm_config.html
New file @@ -0,0 +1,421 @@ <!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <title>AI配置</title> <link rel="stylesheet" href="../../static/vue/element/element.css" /> <style> body { margin: 0; background: radial-gradient(1200px 500px at 10% -10%, rgba(26, 115, 232, 0.14), transparent 50%), radial-gradient(900px 450px at 100% 0%, rgba(38, 166, 154, 0.11), transparent 55%), #f4f7fb; } .container { max-width: 1640px; margin: 16px auto; padding: 0 14px; } .hero { background: linear-gradient(135deg, #0f4c81 0%, #1f6fb2 45%, #2aa198 100%); color: #fff; border-radius: 14px; padding: 14px 16px; margin-bottom: 10px; box-shadow: 0 10px 28px rgba(23, 70, 110, 0.22); } .hero-top { display: flex; align-items: center; justify-content: space-between; gap: 10px; } .hero-title { display: flex; align-items: center; gap: 10px; } .hero-title .main { font-size: 16px; font-weight: 700; letter-spacing: 0.2px; } .hero-title .sub { font-size: 12px; opacity: 0.9; } .summary-grid { margin-top: 10px; display: grid; grid-template-columns: repeat(5, minmax(0, 1fr)); gap: 8px; } .summary-card { border-radius: 10px; background: rgba(255, 255, 255, 0.16); border: 1px solid rgba(255, 255, 255, 0.24); padding: 8px 10px; min-height: 56px; backdrop-filter: blur(3px); } .summary-card .k { font-size: 11px; opacity: 0.88; } .summary-card .v { margin-top: 4px; font-size: 22px; font-weight: 700; line-height: 1.1; } .table-shell { border-radius: 12px; overflow: hidden; box-shadow: 0 6px 22px rgba(15, 28, 48, 0.08); border: 1px solid #e8edf5; background: #fff; } .mono { font-family: Menlo, Monaco, Consolas, "Liberation Mono", monospace; font-size: 12px; } @media (max-width: 1280px) { .summary-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); } } </style> </head> <body> <div id="app" class="container"> <div class="hero"> <div class="hero-top"> <div class="hero-title"> <div v-html="headerIcon" style="display:flex;"></div> <div> <div class="main">AI配置 - LLM路由</div> <div class="sub">支持多API、多模型、多Key,额度耗尽或故障自动切换</div> </div> </div> <div> <el-button type="primary" size="mini" @click="addRoute">新增路由</el-button> <el-button size="mini" @click="loadRoutes">刷新</el-button> </div> </div> <div class="summary-grid"> <div class="summary-card"> <div class="k">总路由</div> <div class="v">{{ summary.total }}</div> </div> <div class="summary-card"> <div class="k">启用</div> <div class="v">{{ summary.enabled }}</div> </div> <div class="summary-card"> <div class="k">故障切换开启</div> <div class="v">{{ summary.errorSwitch }}</div> </div> <div class="summary-card"> <div class="k">额度切换开启</div> <div class="v">{{ summary.quotaSwitch }}</div> </div> <div class="summary-card"> <div class="k">冷却中</div> <div class="v">{{ summary.cooling }}</div> </div> </div> </div> <div class="table-shell"> <el-table :data="routes" stripe height="72vh" v-loading="loading" :header-cell-style="{background:'#f7f9fc', color:'#2e3a4d', fontWeight:600}"> <el-table-column label="名称" width="170"> <template slot-scope="scope"> <el-input v-model="scope.row.name" size="mini"></el-input> </template> </el-table-column> <el-table-column label="Base URL" min-width="220"> <template slot-scope="scope"> <el-input v-model="scope.row.baseUrl" class="mono" size="mini" placeholder="必填,例如: https://api.deepseek.com"></el-input> </template> </el-table-column> <el-table-column label="模型" width="180"> <template slot-scope="scope"> <el-input v-model="scope.row.model" class="mono" size="mini" placeholder="必填,例如: deepseek-chat"></el-input> </template> </el-table-column> <el-table-column label="API Key" min-width="220"> <template slot-scope="scope"> <el-input v-model="scope.row.apiKey" class="mono" type="password" size="mini" placeholder="必填"></el-input> </template> </el-table-column> <el-table-column label="优先级" width="90"> <template slot-scope="scope"> <el-input-number v-model="scope.row.priority" size="mini" :min="0" :max="99999" :controls="false" style="width:80px;"></el-input-number> </template> </el-table-column> <el-table-column label="状态" width="70"> <template slot-scope="scope"> <el-switch v-model="scope.row.status" :active-value="1" :inactive-value="0"></el-switch> </template> </el-table-column> <el-table-column label="思考" width="70"> <template slot-scope="scope"> <el-switch v-model="scope.row.thinking" :active-value="1" :inactive-value="0"></el-switch> </template> </el-table-column> <el-table-column label="额度切换" width="90"> <template slot-scope="scope"> <el-switch v-model="scope.row.switchOnQuota" :active-value="1" :inactive-value="0"></el-switch> </template> </el-table-column> <el-table-column label="故障切换" width="90"> <template slot-scope="scope"> <el-switch v-model="scope.row.switchOnError" :active-value="1" :inactive-value="0"></el-switch> </template> </el-table-column> <el-table-column label="冷却秒数" width="100"> <template slot-scope="scope"> <el-input-number v-model="scope.row.cooldownSeconds" size="mini" :min="0" :max="86400" :controls="false" style="width:90px;"></el-input-number> </template> </el-table-column> <el-table-column label="统计" min-width="220"> <template slot-scope="scope"> <div>成功: {{ scope.row.successCount || 0 }} / 失败: {{ scope.row.failCount || 0 }} / 连续失败: {{ scope.row.consecutiveFailCount || 0 }}</div> <div style="color:#909399;">冷却到: {{ scope.row.cooldownUntil || '-' }}</div> <div style="color:#909399;">最近错误: {{ scope.row.lastError || '-' }}</div> </template> </el-table-column> <el-table-column label="操作" width="120" fixed="right" align="center"> <template slot-scope="scope"> <el-dropdown trigger="click" @command="function(cmd){ handleRouteCommand(cmd, scope.row, scope.$index); }"> <el-button size="mini" type="primary" plain> 操作<i class="el-icon-arrow-down el-icon--right"></i> </el-button> <el-dropdown-menu slot="dropdown"> <el-dropdown-item command="test" :disabled="scope.row.__testing === true"> {{ scope.row.__testing === true ? '测试中...' : '测试' }} </el-dropdown-item> <el-dropdown-item command="save">保存</el-dropdown-item> <el-dropdown-item command="cooldown">清冷却</el-dropdown-item> <el-dropdown-item command="delete" divided>删除</el-dropdown-item> </el-dropdown-menu> </el-dropdown> </template> </el-table-column> </el-table> </div> </div> <script type="text/javascript" src="../../static/vue/js/vue.min.js"></script> <script type="text/javascript" src="../../static/vue/element/element.js"></script> <script type="text/javascript" src="../../static/js/common.js" charset="utf-8"></script> <script> new Vue({ el: '#app', data: function() { return { headerIcon: getAiIconHtml(34, 34), loading: false, routes: [] }; }, computed: { summary: function() { var now = Date.now(); var total = this.routes.length; var enabled = 0, quotaSwitch = 0, errorSwitch = 0, cooling = 0; for (var i = 0; i < this.routes.length; i++) { var x = this.routes[i]; if (x.status === 1) enabled++; if (x.switchOnQuota === 1) quotaSwitch++; if (x.switchOnError === 1) errorSwitch++; if (x.cooldownUntil && new Date(x.cooldownUntil).getTime() > now) cooling++; } return { total: total, enabled: enabled, quotaSwitch: quotaSwitch, errorSwitch: errorSwitch, cooling: cooling }; } }, methods: { authHeaders: function() { return { 'token': localStorage.getItem('token') }; }, handleRouteCommand: function(command, route, idx) { if (command === 'test') return this.testRoute(route); if (command === 'save') return this.saveRoute(route); if (command === 'cooldown') return this.clearCooldown(route); if (command === 'delete') return this.deleteRoute(route, idx); }, loadRoutes: function() { var self = this; self.loading = true; fetch(baseUrl + '/ai/llm/config/list/auth', { headers: self.authHeaders() }) .then(function(r){ return r.json(); }) .then(function(res){ self.loading = false; if (res && res.code === 200) { self.routes = Array.isArray(res.data) ? res.data : []; } else { self.$message.error((res && res.msg) ? res.msg : '加载失败'); } }) .catch(function(){ self.loading = false; self.$message.error('加载失败'); }); }, addRoute: function() { this.routes.unshift({ id: null, name: '', baseUrl: '', apiKey: '', model: '', thinking: 0, priority: 100, status: 1, switchOnQuota: 1, switchOnError: 1, cooldownSeconds: 300, successCount: 0, failCount: 0, consecutiveFailCount: 0, cooldownUntil: null, lastError: null }); }, buildPayload: function(route) { return { id: route.id, name: route.name, baseUrl: route.baseUrl, apiKey: route.apiKey, model: route.model, thinking: route.thinking, priority: route.priority, status: route.status, switchOnQuota: route.switchOnQuota, switchOnError: route.switchOnError, cooldownSeconds: route.cooldownSeconds, memo: route.memo }; }, saveRoute: function(route) { var self = this; fetch(baseUrl + '/ai/llm/config/save/auth', { method: 'POST', headers: Object.assign({ 'Content-Type': 'application/json' }, self.authHeaders()), body: JSON.stringify(self.buildPayload(route)) }) .then(function(r){ return r.json(); }) .then(function(res){ if (res && res.code === 200) { self.$message.success('保存成功'); self.loadRoutes(); } else { self.$message.error((res && res.msg) ? res.msg : '保存失败'); } }) .catch(function(){ self.$message.error('保存失败'); }); }, deleteRoute: function(route, idx) { var self = this; if (!route.id) { self.routes.splice(idx, 1); return; } self.$confirm('确定删除该路由吗?', '提示', { type: 'warning' }).then(function() { fetch(baseUrl + '/ai/llm/config/delete/auth?id=' + encodeURIComponent(route.id), { method: 'POST', headers: self.authHeaders() }) .then(function(r){ return r.json(); }) .then(function(res){ if (res && res.code === 200) { self.$message.success('删除成功'); self.loadRoutes(); } else { self.$message.error((res && res.msg) ? res.msg : '删除失败'); } }) .catch(function(){ self.$message.error('删除失败'); }); }).catch(function(){}); }, clearCooldown: function(route) { var self = this; if (!route.id) return; fetch(baseUrl + '/ai/llm/config/clearCooldown/auth?id=' + encodeURIComponent(route.id), { method: 'POST', headers: self.authHeaders() }) .then(function(r){ return r.json(); }) .then(function(res){ if (res && res.code === 200) { self.$message.success('已清除冷却'); self.loadRoutes(); } else { self.$message.error((res && res.msg) ? res.msg : '操作失败'); } }) .catch(function(){ self.$message.error('操作失败'); }); }, testRoute: function(route) { var self = this; if (route.__testing === true) return; if (!route.id) { self.$message.warning('当前是未保存配置,测试通过后仍需先保存才会生效'); } self.$set(route, '__testing', true); fetch(baseUrl + '/ai/llm/config/test/auth', { method: 'POST', headers: Object.assign({ 'Content-Type': 'application/json' }, self.authHeaders()), body: JSON.stringify(self.buildPayload(route)) }) .then(function(r){ return r.json(); }) .then(function(res){ if (!res || res.code !== 200) { self.$message.error((res && res.msg) ? res.msg : '测试失败'); return; } var data = res.data || {}; var ok = data.ok === true; var title = ok ? '测试成功' : '测试失败'; var msg = '' + '路由: ' + (route.name || '-') + '\n' + 'Base URL: ' + (route.baseUrl || '-') + '\n' + '状态码: ' + (data.statusCode != null ? data.statusCode : '-') + '\n' + '耗时: ' + (data.latencyMs != null ? data.latencyMs : '-') + ' ms\n' + '结果: ' + (data.message || '-') + '\n' + '返回片段: ' + (data.responseSnippet || '-'); self.$alert(msg, title, { confirmButtonText: '确定', type: ok ? 'success' : 'error' }); }) .catch(function(){ self.$message.error('测试失败'); }) .finally(function(){ self.$set(route, '__testing', false); }); } }, mounted: function() { this.loadRoutes(); } }); </script> </body> </html>