From 825813e2dd90cf8bdc48acbb6eee85159bc33b4d Mon Sep 17 00:00:00 2001
From: Junjie <fallin.jie@qq.com>
Date: 星期二, 03 三月 2026 13:04:28 +0800
Subject: [PATCH] #AI LLM路由

---
 src/main/java/com/zy/ai/service/LlmChatService.java |  185 ++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 171 insertions(+), 14 deletions(-)

diff --git a/src/main/java/com/zy/ai/service/LlmChatService.java b/src/main/java/com/zy/ai/service/LlmChatService.java
index 4e6bf19..3e25561 100644
--- a/src/main/java/com/zy/ai/service/LlmChatService.java
+++ b/src/main/java/com/zy/ai/service/LlmChatService.java
@@ -5,6 +5,7 @@
 import com.alibaba.fastjson.JSONObject;
 import com.zy.ai.entity.ChatCompletionRequest;
 import com.zy.ai.entity.ChatCompletionResponse;
+import com.zy.ai.entity.LlmCallLog;
 import com.zy.ai.entity.LlmRouteConfig;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
@@ -16,7 +17,9 @@
 import reactor.core.publisher.Flux;
 
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.List;
+import java.util.UUID;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -27,7 +30,10 @@
 @RequiredArgsConstructor
 public class LlmChatService {
 
+    private static final int LOG_TEXT_LIMIT = 16000;
+
     private final LlmRoutingService llmRoutingService;
+    private final LlmCallLogService llmCallLogService;
 
     @Value("${llm.base-url:}")
     private String fallbackBaseUrl;
@@ -54,7 +60,7 @@
         req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
         req.setStream(false);
 
-        ChatCompletionResponse response = complete(req);
+        ChatCompletionResponse response = complete(req, "chat");
 
         if (response == null ||
                 response.getChoices() == null ||
@@ -81,13 +87,20 @@
             req.setTools(tools);
             req.setTool_choice("auto");
         }
-        return complete(req);
+        return complete(req, tools != null && !tools.isEmpty() ? "chat_completion_tools" : "chat_completion");
     }
 
     public ChatCompletionResponse complete(ChatCompletionRequest req) {
+        return complete(req, "completion");
+    }
+
+    private ChatCompletionResponse complete(ChatCompletionRequest req, String scene) {
+        String traceId = nextTraceId();
         List<ResolvedRoute> routes = resolveRoutes();
         if (routes.isEmpty()) {
             log.error("璋冪敤 LLM 澶辫触: 鏈厤缃彲鐢� LLM 璺敱");
+            recordCall(traceId, scene, false, 1, null, false, null, 0L, req, null, "none",
+                    new RuntimeException("鏈厤缃彲鐢� LLM 璺敱"), "no_route");
             return null;
         }
 
@@ -95,19 +108,39 @@
         for (int i = 0; i < routes.size(); i++) {
             ResolvedRoute route = routes.get(i);
             boolean hasNext = i < routes.size() - 1;
+            ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, false);
+            long start = System.currentTimeMillis();
             try {
-                ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, false);
-                ChatCompletionResponse resp = callCompletion(route, routeReq);
+                CompletionCallResult callResult = callCompletion(route, routeReq);
+                ChatCompletionResponse resp = callResult.response;
                 if (!isValidCompletion(resp)) {
-                    throw new RuntimeException("LLM 鍝嶅簲涓虹┖");
+                    RuntimeException ex = new RuntimeException("LLM 鍝嶅簲涓虹┖");
+                    boolean canSwitch = shouldSwitch(route, false);
+                    markFailure(route, ex, canSwitch);
+                    recordCall(traceId, scene, false, i + 1, route, false, callResult.statusCode,
+                            System.currentTimeMillis() - start, routeReq, callResult.payload, "error", ex,
+                            "invalid_completion");
+                    if (hasNext && canSwitch) {
+                        log.warn("LLM 鍒囨崲鍒颁笅涓�璺敱, current={}, reason={}", route.tag(), ex.getMessage());
+                        continue;
+                    }
+                    log.error("璋冪敤 LLM 澶辫触, route={}", route.tag(), ex);
+                    last = ex;
+                    break;
                 }
                 markSuccess(route);
+                recordCall(traceId, scene, false, i + 1, route, true, callResult.statusCode,
+                        System.currentTimeMillis() - start, routeReq, buildResponseText(resp, callResult.payload),
+                        "none", null, null);
                 return resp;
             } catch (Throwable ex) {
                 last = ex;
                 boolean quota = isQuotaExhausted(ex);
                 boolean canSwitch = shouldSwitch(route, quota);
                 markFailure(route, ex, canSwitch);
+                recordCall(traceId, scene, false, i + 1, route, false, statusCodeOf(ex),
+                        System.currentTimeMillis() - start, routeReq, responseBodyOf(ex),
+                        quota ? "quota" : "error", ex, null);
                 if (hasNext && canSwitch) {
                     log.warn("LLM 鍒囨崲鍒颁笅涓�璺敱, current={}, reason={}", route.tag(), errorText(ex));
                     continue;
@@ -136,7 +169,7 @@
         req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
         req.setStream(true);
 
-        streamWithFailover(req, onChunk, onComplete, onError);
+        streamWithFailover(req, onChunk, onComplete, onError, "chat_stream");
     }
 
     public void chatStreamWithTools(List<ChatCompletionRequest.Message> messages,
@@ -155,19 +188,23 @@
             req.setTools(tools);
             req.setTool_choice("auto");
         }
-        streamWithFailover(req, onChunk, onComplete, onError);
+        streamWithFailover(req, onChunk, onComplete, onError, tools != null && !tools.isEmpty() ? "chat_stream_tools" : "chat_stream");
     }
 
     private void streamWithFailover(ChatCompletionRequest req,
                                     Consumer<String> onChunk,
                                     Runnable onComplete,
-                                    Consumer<Throwable> onError) {
+                                    Consumer<Throwable> onError,
+                                    String scene) {
+        String traceId = nextTraceId();
         List<ResolvedRoute> routes = resolveRoutes();
         if (routes.isEmpty()) {
+            recordCall(traceId, scene, true, 1, null, false, null, 0L, req, null, "none",
+                    new RuntimeException("鏈厤缃彲鐢� LLM 璺敱"), "no_route");
             if (onError != null) onError.accept(new RuntimeException("鏈厤缃彲鐢� LLM 璺敱"));
             return;
         }
-        attemptStream(routes, 0, req, onChunk, onComplete, onError);
+        attemptStream(routes, 0, req, onChunk, onComplete, onError, traceId, scene);
     }
 
     private void attemptStream(List<ResolvedRoute> routes,
@@ -175,7 +212,9 @@
                                ChatCompletionRequest req,
                                Consumer<String> onChunk,
                                Runnable onComplete,
-                               Consumer<Throwable> onError) {
+                               Consumer<Throwable> onError,
+                               String traceId,
+                               String scene) {
         if (index >= routes.size()) {
             if (onError != null) onError.accept(new RuntimeException("LLM 璺敱鍏ㄩ儴澶辫触"));
             return;
@@ -183,6 +222,8 @@
 
         ResolvedRoute route = routes.get(index);
         ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, true);
+        long start = System.currentTimeMillis();
+        StringBuilder outputBuffer = new StringBuilder();
 
         AtomicBoolean doneSeen = new AtomicBoolean(false);
         AtomicBoolean errorSeen = new AtomicBoolean(false);
@@ -240,6 +281,7 @@
                             String content = delta.getString("content");
                             if (content != null) {
                                 queue.offer(content);
+                                appendLimited(outputBuffer, content);
                             }
                         }
                     }
@@ -253,9 +295,12 @@
             boolean quota = isQuotaExhausted(err);
             boolean canSwitch = shouldSwitch(route, quota);
             markFailure(route, err, canSwitch);
+            recordCall(traceId, scene, true, index + 1, route, false, statusCodeOf(err),
+                    System.currentTimeMillis() - start, routeReq, outputBuffer.toString(),
+                    quota ? "quota" : "error", err, "emitted=" + emitted.get());
             if (!emitted.get() && canSwitch && index < routes.size() - 1) {
                 log.warn("LLM 璺敱澶辫触锛岃嚜鍔ㄥ垏鎹紝current={}, reason={}", route.tag(), errorText(err));
-                attemptStream(routes, index + 1, req, onChunk, onComplete, onError);
+                attemptStream(routes, index + 1, req, onChunk, onComplete, onError, traceId, scene);
                 return;
             }
             if (onError != null) onError.accept(err);
@@ -266,14 +311,20 @@
                 doneSeen.set(true);
                 boolean canSwitch = shouldSwitch(route, false);
                 markFailure(route, ex, canSwitch);
+                recordCall(traceId, scene, true, index + 1, route, false, 200,
+                        System.currentTimeMillis() - start, routeReq, outputBuffer.toString(),
+                        "error", ex, "unexpected_stream_end");
                 if (!emitted.get() && canSwitch && index < routes.size() - 1) {
                     log.warn("LLM 璺敱娴佸紓甯稿畬鎴愶紝鑷姩鍒囨崲锛宑urrent={}", route.tag());
-                    attemptStream(routes, index + 1, req, onChunk, onComplete, onError);
+                    attemptStream(routes, index + 1, req, onChunk, onComplete, onError, traceId, scene);
                 } else {
                     if (onError != null) onError.accept(ex);
                 }
             } else {
                 markSuccess(route);
+                recordCall(traceId, scene, true, index + 1, route, true, 200,
+                        System.currentTimeMillis() - start, routeReq, outputBuffer.toString(),
+                        "none", null, null);
                 doneSeen.set(true);
             }
         });
@@ -299,7 +350,7 @@
                 .doOnError(ex -> log.error("璋冪敤 LLM 娴佸紡澶辫触, route={}", route.tag(), ex));
     }
 
-    private ChatCompletionResponse callCompletion(ResolvedRoute route, ChatCompletionRequest req) {
+    private CompletionCallResult callCompletion(ResolvedRoute route, ChatCompletionRequest req) {
         WebClient client = WebClient.builder().baseUrl(route.baseUrl).build();
         RawCompletionResult raw = client.post()
                 .uri("/chat/completions")
@@ -318,7 +369,7 @@
         if (raw.statusCode < 200 || raw.statusCode >= 300) {
             throw new LlmRouteException(raw.statusCode, raw.payload);
         }
-        return parseCompletion(raw.payload);
+        return new CompletionCallResult(raw.statusCode, raw.payload, parseCompletion(raw.payload));
     }
 
     private ChatCompletionRequest applyRoute(ChatCompletionRequest req, ResolvedRoute route, boolean stream) {
@@ -517,6 +568,112 @@
         return r;
     }
 
+    private String nextTraceId() {
+        return UUID.randomUUID().toString().replace("-", "");
+    }
+
+    private void appendLimited(StringBuilder sb, String text) {
+        if (sb == null || text == null || text.isEmpty()) {
+            return;
+        }
+        int remain = LOG_TEXT_LIMIT - sb.length();
+        if (remain <= 0) {
+            return;
+        }
+        if (text.length() <= remain) {
+            sb.append(text);
+        } else {
+            sb.append(text, 0, remain);
+        }
+    }
+
+    private Integer statusCodeOf(Throwable ex) {
+        if (ex instanceof LlmRouteException) {
+            return ((LlmRouteException) ex).statusCode;
+        }
+        return null;
+    }
+
+    private String responseBodyOf(Throwable ex) {
+        if (ex instanceof LlmRouteException) {
+            return cut(((LlmRouteException) ex).body, LOG_TEXT_LIMIT);
+        }
+        return null;
+    }
+
+    private String buildResponseText(ChatCompletionResponse resp, String fallbackPayload) {
+        if (resp != null && resp.getChoices() != null && !resp.getChoices().isEmpty()
+                && resp.getChoices().get(0) != null && resp.getChoices().get(0).getMessage() != null) {
+            ChatCompletionRequest.Message m = resp.getChoices().get(0).getMessage();
+            if (!isBlank(m.getContent())) {
+                return cut(m.getContent(), LOG_TEXT_LIMIT);
+            }
+            if (m.getTool_calls() != null && !m.getTool_calls().isEmpty()) {
+                return cut(JSON.toJSONString(m), LOG_TEXT_LIMIT);
+            }
+        }
+        return cut(fallbackPayload, LOG_TEXT_LIMIT);
+    }
+
+    private String safeName(Throwable ex) {
+        return ex == null ? null : ex.getClass().getSimpleName();
+    }
+
+    private String cut(String text, int maxLen) {
+        if (text == null) return null;
+        String clean = text.replace("\r", " ");
+        return clean.length() > maxLen ? clean.substring(0, maxLen) : clean;
+    }
+
+    private void recordCall(String traceId,
+                            String scene,
+                            boolean stream,
+                            int attemptNo,
+                            ResolvedRoute route,
+                            boolean success,
+                            Integer httpStatus,
+                            long latencyMs,
+                            ChatCompletionRequest req,
+                            String response,
+                            String switchMode,
+                            Throwable err,
+                            String extra) {
+        LlmCallLog item = new LlmCallLog();
+        item.setTraceId(cut(traceId, 64));
+        item.setScene(cut(scene, 64));
+        item.setStream((short) (stream ? 1 : 0));
+        item.setAttemptNo(attemptNo);
+        if (route != null) {
+            item.setRouteId(route.id);
+            item.setRouteName(cut(route.name, 128));
+            item.setBaseUrl(cut(route.baseUrl, 255));
+            item.setModel(cut(route.model, 128));
+        }
+        item.setSuccess((short) (success ? 1 : 0));
+        item.setHttpStatus(httpStatus);
+        item.setLatencyMs(latencyMs < 0 ? 0 : latencyMs);
+        item.setSwitchMode(cut(switchMode, 32));
+        item.setRequestContent(cut(JSON.toJSONString(req), LOG_TEXT_LIMIT));
+        item.setResponseContent(cut(response, LOG_TEXT_LIMIT));
+        item.setErrorType(cut(safeName(err), 128));
+        item.setErrorMessage(err == null ? null : cut(errorText(err), 1024));
+        item.setExtra(cut(extra, 512));
+        item.setCreateTime(new Date());
+        llmCallLogService.saveIgnoreError(item);
+    }
+
+    private static class CompletionCallResult {
+        private final int statusCode;
+        private final String payload;
+        private final ChatCompletionResponse response;
+
+        private CompletionCallResult(int statusCode, String payload, ChatCompletionResponse response) {
+            this.statusCode = statusCode;
+            this.payload = payload;
+            this.response = response;
+        }
+    }
+
     private static class RawCompletionResult {
         private final int statusCode;
         private final String payload;

--
Gitblit v1.9.1