From d8d82ceda75fa17972d6996f83078367a19c1730 Mon Sep 17 00:00:00 2001
From: Junjie <fallin.jie@qq.com>
Date: 星期四, 12 三月 2026 15:38:32 +0800
Subject: [PATCH] #

---
 src/main/webapp/views/ai/diagnosis.html                          |   46 +++
 src/main/java/com/zy/ai/mapper/AiChatMessageMapper.java          |   11 
 src/main/java/com/zy/ai/service/LlmSpringAiClientService.java    |   35 +
 src/main/resources/sql/20260312_create_sys_ai_chat_storage.sql   |   35 ++
 src/main/java/com/zy/ai/entity/AiChatMessage.java                |   33 ++
 src/main/webapp/views/ai/llm_config.html                         |   28 ++
 src/main/java/com/zy/ai/service/LlmChatService.java              |   77 ++++-
 src/main/java/com/zy/ai/service/impl/AiChatStoreServiceImpl.java |  196 +++++++++++++++
 src/main/java/com/zy/ai/entity/AiChatSession.java                |   73 +++++
 src/main/java/com/zy/ai/service/WcsDiagnosisService.java         |  176 ++++++-------
 src/main/java/com/zy/core/enums/RedisKeyType.java                |    2 
 src/main/java/com/zy/ai/service/AiChatStoreService.java          |   26 ++
 src/main/java/com/zy/ai/mapper/AiChatSessionMapper.java          |   11 
 13 files changed, 621 insertions(+), 128 deletions(-)

diff --git a/src/main/java/com/zy/ai/entity/AiChatMessage.java b/src/main/java/com/zy/ai/entity/AiChatMessage.java
new file mode 100644
index 0000000..8483823
--- /dev/null
+++ b/src/main/java/com/zy/ai/entity/AiChatMessage.java
@@ -0,0 +1,33 @@
+package com.zy.ai.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.util.Date;
+
+@Data
+@TableName("sys_ai_chat_message")
+public class AiChatMessage implements Serializable {
+
+    private static final long serialVersionUID = 1L;
+
+    @TableId(value = "id", type = IdType.AUTO)
+    private Long id;
+
+    @TableField("chat_id")
+    private String chatId;
+
+    @TableField("seq_no")
+    private Integer seqNo;
+
+    private String role;
+
+    private String content;
+
+    @TableField("create_time")
+    private Date createTime;
+}
diff --git a/src/main/java/com/zy/ai/entity/AiChatSession.java b/src/main/java/com/zy/ai/entity/AiChatSession.java
new file mode 100644
index 0000000..eb3dafd
--- /dev/null
+++ b/src/main/java/com/zy/ai/entity/AiChatSession.java
@@ -0,0 +1,73 @@
+package com.zy.ai.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.util.Date;
+
+@Data
+@TableName("sys_ai_chat_session")
+public class AiChatSession implements Serializable {
+
+    private static final long serialVersionUID = 1L;
+
+    @TableId(value = "id", type = IdType.AUTO)
+    private Long id;
+
+    @TableField("chat_id")
+    private String chatId;
+
+    private String title;
+
+    @TableField("prompt_template_id")
+    private Long promptTemplateId;
+
+    @TableField("prompt_scene_code")
+    private String promptSceneCode;
+
+    @TableField("prompt_version")
+    private Integer promptVersion;
+
+    @TableField("prompt_name")
+    private String promptName;
+
+    @TableField("message_count")
+    private Integer messageCount;
+
+    @TableField("last_prompt_tokens")
+    private Long lastPromptTokens;
+
+    @TableField("last_completion_tokens")
+    private Long lastCompletionTokens;
+
+    @TableField("last_total_tokens")
+    private Long lastTotalTokens;
+
+    @TableField("last_llm_call_count")
+    private Integer lastLlmCallCount;
+
+    @TableField("last_token_updated_at")
+    private Date lastTokenUpdatedAt;
+
+    @TableField("sum_prompt_tokens")
+    private Long sumPromptTokens;
+
+    @TableField("sum_completion_tokens")
+    private Long sumCompletionTokens;
+
+    @TableField("sum_total_tokens")
+    private Long sumTotalTokens;
+
+    @TableField("ask_count")
+    private Long askCount;
+
+    @TableField("create_time")
+    private Date createTime;
+
+    @TableField("update_time")
+    private Date updateTime;
+}
diff --git a/src/main/java/com/zy/ai/mapper/AiChatMessageMapper.java b/src/main/java/com/zy/ai/mapper/AiChatMessageMapper.java
new file mode 100644
index 0000000..74312f6
--- /dev/null
+++ b/src/main/java/com/zy/ai/mapper/AiChatMessageMapper.java
@@ -0,0 +1,11 @@
+package com.zy.ai.mapper;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.zy.ai.entity.AiChatMessage;
+import org.apache.ibatis.annotations.Mapper;
+import org.springframework.stereotype.Repository;
+
+@Mapper
+@Repository
+public interface AiChatMessageMapper extends BaseMapper<AiChatMessage> {
+}
diff --git a/src/main/java/com/zy/ai/mapper/AiChatSessionMapper.java b/src/main/java/com/zy/ai/mapper/AiChatSessionMapper.java
new file mode 100644
index 0000000..4fd3307
--- /dev/null
+++ b/src/main/java/com/zy/ai/mapper/AiChatSessionMapper.java
@@ -0,0 +1,11 @@
+package com.zy.ai.mapper;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.zy.ai.entity.AiChatSession;
+import org.apache.ibatis.annotations.Mapper;
+import org.springframework.stereotype.Repository;
+
+@Mapper
+@Repository
+public interface AiChatSessionMapper extends BaseMapper<AiChatSession> {
+}
diff --git a/src/main/java/com/zy/ai/service/AiChatStoreService.java b/src/main/java/com/zy/ai/service/AiChatStoreService.java
new file mode 100644
index 0000000..200600f
--- /dev/null
+++ b/src/main/java/com/zy/ai/service/AiChatStoreService.java
@@ -0,0 +1,26 @@
+package com.zy.ai.service;
+
+import com.zy.ai.entity.AiPromptTemplate;
+import com.zy.ai.entity.ChatCompletionRequest;
+
+import java.util.List;
+import java.util.Map;
+
+public interface AiChatStoreService {
+
+    List<Map<String, Object>> listChats();
+
+    boolean deleteChat(String chatId);
+
+    List<ChatCompletionRequest.Message> getChatHistory(String chatId);
+
+    void saveConversation(String chatId,
+                          String title,
+                          ChatCompletionRequest.Message userMessage,
+                          ChatCompletionRequest.Message assistantMessage,
+                          AiPromptTemplate promptTemplate,
+                          long promptTokens,
+                          long completionTokens,
+                          long totalTokens,
+                          int llmCallCount);
+}
diff --git a/src/main/java/com/zy/ai/service/LlmChatService.java b/src/main/java/com/zy/ai/service/LlmChatService.java
index e2eddd6..a3835f5 100644
--- a/src/main/java/com/zy/ai/service/LlmChatService.java
+++ b/src/main/java/com/zy/ai/service/LlmChatService.java
@@ -22,6 +22,7 @@
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Consumer;
 
 @Slf4j
@@ -99,7 +100,7 @@
         List<ResolvedRoute> routes = resolveRoutes();
         if (routes.isEmpty()) {
             log.error("璋冪敤 LLM 澶辫触: 鏈厤缃彲鐢� LLM 璺敱");
-            recordCall(traceId, scene, false, 1, null, false, null, 0L, req, null, "none",
+            recordCall(traceId, scene, false, 1, null, false, null, 0L, req, null, null, "none",
                     new RuntimeException("鏈厤缃彲鐢� LLM 璺敱"), "no_route");
             return null;
         }
@@ -118,7 +119,7 @@
                     boolean canSwitch = shouldSwitch(route, false);
                     markFailure(route, ex, canSwitch);
                     recordCall(traceId, scene, false, i + 1, route, false, callResult.statusCode,
-                            System.currentTimeMillis() - start, routeReq, callResult.payload, "error", ex,
+                            System.currentTimeMillis() - start, routeReq, resp, callResult.payload, "error", ex,
                             "invalid_completion");
                     if (hasNext && canSwitch) {
                         log.warn("LLM 鍒囨崲鍒颁笅涓�璺敱, current={}, reason={}", route.tag(), ex.getMessage());
@@ -130,7 +131,7 @@
                 }
                 markSuccess(route);
                 recordCall(traceId, scene, false, i + 1, route, true, callResult.statusCode,
-                        System.currentTimeMillis() - start, routeReq, buildResponseText(resp, callResult.payload),
+                        System.currentTimeMillis() - start, routeReq, resp, buildResponseText(resp, callResult.payload),
                         "none", null, null);
                 return resp;
             } catch (Throwable ex) {
@@ -139,7 +140,7 @@
                 boolean canSwitch = shouldSwitch(route, quota);
                 markFailure(route, ex, canSwitch);
                 recordCall(traceId, scene, false, i + 1, route, false, statusCodeOf(ex),
-                        System.currentTimeMillis() - start, routeReq, responseBodyOf(ex),
+                        System.currentTimeMillis() - start, routeReq, null, responseBodyOf(ex),
                         quota ? "quota" : "error", ex, null);
                 if (hasNext && canSwitch) {
                     log.warn("LLM 鍒囨崲鍒颁笅涓�璺敱, current={}, reason={}", route.tag(), errorText(ex));
@@ -169,7 +170,7 @@
         req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
         req.setStream(true);
 
-        streamWithFailover(req, onChunk, onComplete, onError, "chat_stream");
+        streamWithFailover(req, onChunk, onComplete, onError, null, "chat_stream");
     }
 
     public void chatStreamWithTools(List<ChatCompletionRequest.Message> messages,
@@ -178,7 +179,8 @@
                                     List<Object> tools,
                                     Consumer<String> onChunk,
                                     Runnable onComplete,
-                                    Consumer<Throwable> onError) {
+                                    Consumer<Throwable> onError,
+                                    Consumer<ChatCompletionResponse.Usage> onUsage) {
         ChatCompletionRequest req = new ChatCompletionRequest();
         req.setMessages(messages);
         req.setTemperature(temperature != null ? temperature : 0.3);
@@ -188,23 +190,24 @@
             req.setTools(tools);
             req.setTool_choice("auto");
         }
-        streamWithFailover(req, onChunk, onComplete, onError, tools != null && !tools.isEmpty() ? "chat_stream_tools" : "chat_stream");
+        streamWithFailover(req, onChunk, onComplete, onError, onUsage, tools != null && !tools.isEmpty() ? "chat_stream_tools" : "chat_stream");
     }
 
     private void streamWithFailover(ChatCompletionRequest req,
                                     Consumer<String> onChunk,
                                     Runnable onComplete,
                                     Consumer<Throwable> onError,
+                                    Consumer<ChatCompletionResponse.Usage> onUsage,
                                     String scene) {
         String traceId = nextTraceId();
         List<ResolvedRoute> routes = resolveRoutes();
         if (routes.isEmpty()) {
-            recordCall(traceId, scene, true, 1, null, false, null, 0L, req, null, "none",
+            recordCall(traceId, scene, true, 1, null, false, null, 0L, req, null, null, "none",
                     new RuntimeException("鏈厤缃彲鐢� LLM 璺敱"), "no_route");
             if (onError != null) onError.accept(new RuntimeException("鏈厤缃彲鐢� LLM 璺敱"));
             return;
         }
-        attemptStream(routes, 0, req, onChunk, onComplete, onError, traceId, scene);
+        attemptStream(routes, 0, req, onChunk, onComplete, onError, onUsage, traceId, scene);
     }
 
     private void attemptStream(List<ResolvedRoute> routes,
@@ -213,6 +216,7 @@
                                Consumer<String> onChunk,
                                Runnable onComplete,
                                Consumer<Throwable> onError,
+                               Consumer<ChatCompletionResponse.Usage> onUsage,
                                String traceId,
                                String scene) {
         if (index >= routes.size()) {
@@ -228,6 +232,7 @@
         AtomicBoolean doneSeen = new AtomicBoolean(false);
         AtomicBoolean errorSeen = new AtomicBoolean(false);
         AtomicBoolean emitted = new AtomicBoolean(false);
+        AtomicReference<ChatCompletionResponse.Usage> usageRef = new AtomicReference<>();
         LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();
 
         Thread drain = new Thread(() -> {
@@ -257,7 +262,7 @@
         drain.setDaemon(true);
         drain.start();
 
-        Flux<String> streamSource = streamFluxWithSpringAi(route, routeReq);
+        Flux<String> streamSource = streamFluxWithSpringAi(route, routeReq, usageRef::set);
         streamSource.subscribe(payload -> {
             if (payload == null || payload.isEmpty()) return;
             queue.offer(payload);
@@ -269,25 +274,33 @@
             boolean canSwitch = shouldSwitch(route, quota);
             markFailure(route, err, canSwitch);
             recordCall(traceId, scene, true, index + 1, route, false, statusCodeOf(err),
-                    System.currentTimeMillis() - start, routeReq, outputBuffer.toString(),
+                    System.currentTimeMillis() - start, routeReq, usageResponse(usageRef.get()), outputBuffer.toString(),
                     quota ? "quota" : "error", err, "emitted=" + emitted.get());
             if (!emitted.get() && canSwitch && index < routes.size() - 1) {
                 log.warn("LLM 璺敱澶辫触锛岃嚜鍔ㄥ垏鎹紝current={}, reason={}", route.tag(), errorText(err));
-                attemptStream(routes, index + 1, req, onChunk, onComplete, onError, traceId, scene);
+                attemptStream(routes, index + 1, req, onChunk, onComplete, onError, onUsage, traceId, scene);
                 return;
             }
             if (onError != null) onError.accept(err);
         }, () -> {
             markSuccess(route);
+            if (onUsage != null && usageRef.get() != null) {
+                try {
+                    onUsage.accept(usageRef.get());
+                } catch (Exception ignore) {
+                }
+            }
             recordCall(traceId, scene, true, index + 1, route, true, 200,
-                    System.currentTimeMillis() - start, routeReq, outputBuffer.toString(),
+                    System.currentTimeMillis() - start, routeReq, usageResponse(usageRef.get()), outputBuffer.toString(),
                     "none", null, null);
             doneSeen.set(true);
         });
     }
 
-    private Flux<String> streamFluxWithSpringAi(ResolvedRoute route, ChatCompletionRequest req) {
-        return llmSpringAiClientService.streamCompletion(route.baseUrl, route.apiKey, req)
+    private Flux<String> streamFluxWithSpringAi(ResolvedRoute route,
+                                                ChatCompletionRequest req,
+                                                Consumer<ChatCompletionResponse.Usage> usageConsumer) {
+        return llmSpringAiClientService.streamCompletion(route.baseUrl, route.apiKey, req, usageConsumer)
                 .doOnError(ex -> log.error("璋冪敤 Spring AI 娴佸紡澶辫触, route={}", route.tag(), ex));
     }
 
@@ -491,6 +504,7 @@
                             Integer httpStatus,
                             long latencyMs,
                             ChatCompletionRequest req,
+                            ChatCompletionResponse responseObj,
                             String response,
                             String switchMode,
                             Throwable err,
@@ -514,11 +528,42 @@
         item.setResponseContent(cut(response, LOG_TEXT_LIMIT));
         item.setErrorType(cut(safeName(err), 128));
         item.setErrorMessage(err == null ? null : cut(errorText(err), 1024));
-        item.setExtra(cut(extra, 512));
+        item.setExtra(cut(buildExtraPayload(responseObj == null ? null : responseObj.getUsage(), extra), 512));
         item.setCreateTime(new Date());
         llmCallLogService.saveIgnoreError(item);
     }
 
+    private ChatCompletionResponse usageResponse(ChatCompletionResponse.Usage usage) {
+        if (usage == null) {
+            return null;
+        }
+        ChatCompletionResponse response = new ChatCompletionResponse();
+        response.setUsage(usage);
+        return response;
+    }
+
+    private String buildExtraPayload(ChatCompletionResponse.Usage usage, String extra) {
+        if (usage == null && isBlank(extra)) {
+            return null;
+        }
+        HashMap<String, Object> payload = new HashMap<>();
+        if (usage != null) {
+            if (usage.getPromptTokens() != null) {
+                payload.put("promptTokens", usage.getPromptTokens());
+            }
+            if (usage.getCompletionTokens() != null) {
+                payload.put("completionTokens", usage.getCompletionTokens());
+            }
+            if (usage.getTotalTokens() != null) {
+                payload.put("totalTokens", usage.getTotalTokens());
+            }
+        }
+        if (!isBlank(extra)) {
+            payload.put("note", extra);
+        }
+        return payload.isEmpty() ? null : JSON.toJSONString(payload);
+    }
+
     private static class CompletionCallResult {
         private final int statusCode;
         private final String payload;
diff --git a/src/main/java/com/zy/ai/service/LlmSpringAiClientService.java b/src/main/java/com/zy/ai/service/LlmSpringAiClientService.java
index 746aa20..0079565 100644
--- a/src/main/java/com/zy/ai/service/LlmSpringAiClientService.java
+++ b/src/main/java/com/zy/ai/service/LlmSpringAiClientService.java
@@ -22,6 +22,7 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.function.Consumer;
 
 @Service
 public class LlmSpringAiClientService {
@@ -43,19 +44,33 @@
                 legacy);
     }
 
-    public Flux<String> streamCompletion(String baseUrl, String apiKey, ChatCompletionRequest req) {
+    public Flux<String> streamCompletion(String baseUrl, String apiKey, ChatCompletionRequest req, Consumer<ChatCompletionResponse.Usage> usageConsumer) {
         OpenAiApi api = buildOpenAiApi(baseUrl, apiKey);
         OpenAiApi.ChatCompletionRequest springReq = buildSpringAiRequest(req, true);
         return api.chatCompletionStream(springReq)
-                .flatMapIterable(chunk -> chunk == null || chunk.choices() == null
-                        ? List.<OpenAiApi.ChatCompletionChunk.ChunkChoice>of()
-                        : chunk.choices())
-                .map(OpenAiApi.ChatCompletionChunk.ChunkChoice::delta)
-                .filter(delta -> delta != null)
-                .handle((delta, sink) -> {
-                    String text = extractSpringAiContent(delta);
-                    if (text != null && !text.isEmpty()) {
-                        sink.next(text);
+                .handle((chunk, sink) -> {
+                    if (chunk == null) {
+                        return;
+                    }
+                    if (chunk.usage() != null && usageConsumer != null) {
+                        ChatCompletionResponse.Usage usage = new ChatCompletionResponse.Usage();
+                        usage.setPromptTokens(chunk.usage().promptTokens());
+                        usage.setCompletionTokens(chunk.usage().completionTokens());
+                        usage.setTotalTokens(chunk.usage().totalTokens());
+                        usageConsumer.accept(usage);
+                    }
+                    List<OpenAiApi.ChatCompletionChunk.ChunkChoice> choices = chunk.choices();
+                    if (choices == null || choices.isEmpty()) {
+                        return;
+                    }
+                    for (OpenAiApi.ChatCompletionChunk.ChunkChoice choice : choices) {
+                        if (choice == null || choice.delta() == null) {
+                            continue;
+                        }
+                        String text = extractSpringAiContent(choice.delta());
+                        if (text != null && !text.isEmpty()) {
+                            sink.next(text);
+                        }
                     }
                 });
     }
diff --git a/src/main/java/com/zy/ai/service/WcsDiagnosisService.java b/src/main/java/com/zy/ai/service/WcsDiagnosisService.java
index 757e877..3f65ed0 100644
--- a/src/main/java/com/zy/ai/service/WcsDiagnosisService.java
+++ b/src/main/java/com/zy/ai/service/WcsDiagnosisService.java
@@ -8,10 +8,7 @@
 import com.zy.ai.entity.WcsDiagnosisRequest;
 import com.zy.ai.enums.AiPromptScene;
 import com.zy.ai.mcp.service.SpringAiMcpToolManager;
-import com.zy.ai.service.AiPromptTemplateService;
 import com.zy.ai.utils.AiUtils;
-import com.zy.common.utils.RedisUtil;
-import com.zy.core.enums.RedisKeyType;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
 import lombok.RequiredArgsConstructor;
@@ -27,18 +24,16 @@
 @Slf4j
 public class WcsDiagnosisService {
 
-    private static final long CHAT_TTL_SECONDS = 7L * 24 * 3600;
-
     @Autowired
     private LlmChatService llmChatService;
-    @Autowired
-    private RedisUtil redisUtil;
     @Autowired
     private AiUtils aiUtils;
     @Autowired
     private SpringAiMcpToolManager mcpToolManager;
     @Autowired
     private AiPromptTemplateService aiPromptTemplateService;
+    @Autowired
+    private AiChatStoreService aiChatStoreService;
 
     public void diagnoseStream(WcsDiagnosisRequest request, SseEmitter emitter) {
         List<ChatCompletionRequest.Message> messages = new ArrayList<>();
@@ -61,25 +56,13 @@
                           SseEmitter emitter) {
         List<ChatCompletionRequest.Message> messages = new ArrayList<>();
 
-        List<ChatCompletionRequest.Message> history = null;
-        String historyKey = null;
-        String metaKey = null;
         if (chatId != null && !chatId.isEmpty()) {
-            historyKey = RedisKeyType.AI_CHAT_HISTORY.key + chatId;
-            metaKey = RedisKeyType.AI_CHAT_META.key + chatId;
             if (reset) {
-                redisUtil.del(historyKey, metaKey);
+                aiChatStoreService.deleteChat(chatId);
             }
-            List<Object> stored = redisUtil.lGet(historyKey, 0, -1);
-            if (stored != null && !stored.isEmpty()) {
-                history = new ArrayList<>(stored.size());
-                for (Object o : stored) {
-                    ChatCompletionRequest.Message m = convertToMessage(o);
-                    if (m != null) history.add(m);
-                }
-                if (!history.isEmpty()) messages.addAll(history);
-            } else {
-                history = new ArrayList<>();
+            List<ChatCompletionRequest.Message> history = aiChatStoreService.getChatHistory(chatId);
+            if (history != null && !history.isEmpty()) {
+                messages.addAll(history);
             }
         }
 
@@ -92,68 +75,21 @@
 
         ChatCompletionRequest.Message mcpUser = new ChatCompletionRequest.Message();
         mcpUser.setRole("user");
-        mcpUser.setContent("銆愮敤鎴锋彁闂�慭n" + (prompt == null ? "" : prompt));
+        mcpUser.setContent(prompt == null ? "" : prompt);
 
         runMcpStreamingDiagnosis(messages, mcpSystem, mcpUser, promptTemplate, 0.3, 2048, emitter, finalChatId);
     }
 
     public List<Map<String, Object>> listChats() {
-        java.util.Set<String> keys = redisUtil.scanKeys(RedisKeyType.AI_CHAT_META.key, 1000);
-        List<Map<String, Object>> resp = new ArrayList<>();
-        if (keys != null) {
-            for (String key : keys) {
-                Map<Object, Object> m = redisUtil.hmget(key);
-                if (m != null && !m.isEmpty()) {
-                    java.util.HashMap<String, Object> item = new java.util.HashMap<>();
-                    for (Map.Entry<Object, Object> e : m.entrySet()) {
-                        item.put(String.valueOf(e.getKey()), e.getValue());
-                    }
-                    String chatId = String.valueOf(item.get("chatId"));
-                    String historyKey = RedisKeyType.AI_CHAT_HISTORY.key + chatId;
-                    item.put("size", redisUtil.lGetListSize(historyKey));
-                    resp.add(item);
-                }
-            }
-        }
-        return resp;
+        return aiChatStoreService.listChats();
     }
 
     public boolean deleteChat(String chatId) {
-        if (chatId == null || chatId.isEmpty()) return false;
-        String historyKey = RedisKeyType.AI_CHAT_HISTORY.key + chatId;
-        String metaKey = RedisKeyType.AI_CHAT_META.key + chatId;
-        redisUtil.del(historyKey, metaKey);
-        return true;
+        return aiChatStoreService.deleteChat(chatId);
     }
 
     public List<ChatCompletionRequest.Message> getChatHistory(String chatId) {
-        if (chatId == null || chatId.isEmpty()) return java.util.Collections.emptyList();
-        String historyKey = RedisKeyType.AI_CHAT_HISTORY.key + chatId;
-        List<Object> stored = redisUtil.lGet(historyKey, 0, -1);
-        List<ChatCompletionRequest.Message> result = new ArrayList<>();
-        if (stored != null) {
-            for (Object o : stored) {
-                ChatCompletionRequest.Message m = convertToMessage(o);
-                if (m != null) result.add(m);
-            }
-        }
-        return result;
-    }
-
-    private ChatCompletionRequest.Message convertToMessage(Object o) {
-        if (o instanceof ChatCompletionRequest.Message) {
-            return (ChatCompletionRequest.Message) o;
-        }
-        if (o instanceof Map) {
-            Map<?, ?> map = (Map<?, ?>) o;
-            ChatCompletionRequest.Message m = new ChatCompletionRequest.Message();
-            Object role = map.get("role");
-            Object content = map.get("content");
-            m.setRole(role == null ? null : String.valueOf(role));
-            m.setContent(content == null ? null : String.valueOf(content));
-            return m;
-        }
-        return null;
+        return aiChatStoreService.getChatHistory(chatId);
     }
 
     private String buildTitleFromPrompt(String prompt) {
@@ -178,6 +114,7 @@
             if (tools.isEmpty()) {
                 throw new IllegalStateException("No MCP tools registered");
             }
+            AgentUsageStats usageStats = new AgentUsageStats();
 
             baseMessages.add(systemPrompt);
             baseMessages.add(userQuestion);
@@ -195,6 +132,7 @@
                 if (resp == null || resp.getChoices() == null || resp.getChoices().isEmpty() || resp.getChoices().get(0).getMessage() == null) {
                     throw new IllegalStateException("LLM returned empty response");
                 }
+                usageStats.add(resp.getUsage());
 
                 ChatCompletionRequest.Message assistant = resp.getChoices().get(0).getMessage();
                 messages.add(assistant);
@@ -256,45 +194,34 @@
                 } catch (Exception ignore) {}
             }, () -> {
                 try {
+                    emitTokenUsage(emitter, usageStats);
                     sse(emitter, "\\n\\n銆怉I銆戣繍琛屽凡鍋滄锛堟甯哥粨鏉燂級\\n\\n");
                     log.info("AI MCP diagnose stopped: final end");
                     emitter.complete();
 
                     if (chatId != null) {
-                        String historyKey = RedisKeyType.AI_CHAT_HISTORY.key + chatId;
-                        String metaKey = RedisKeyType.AI_CHAT_META.key + chatId;
-
                         ChatCompletionRequest.Message a = new ChatCompletionRequest.Message();
                         a.setRole("assistant");
                         a.setContent(assistantBuffer.toString());
-                        redisUtil.lSet(historyKey, userQuestion);
-                        redisUtil.lSet(historyKey, a);
-                        redisUtil.expire(historyKey, CHAT_TTL_SECONDS);
-                        Map<Object, Object> old = redisUtil.hmget(metaKey);
-                        Long createdAt = old != null && old.get("createdAt") != null ?
-                                (old.get("createdAt") instanceof Number ? ((Number) old.get("createdAt")).longValue() : Long.valueOf(String.valueOf(old.get("createdAt"))))
-                                : System.currentTimeMillis();
-                        Map<String, Object> meta = new java.util.HashMap<>();
-                        meta.put("chatId", chatId);
-                        meta.put("title", buildTitleFromPrompt(userQuestion.getContent()));
-                        if (promptTemplate != null) {
-                            meta.put("promptTemplateId", promptTemplate.getId());
-                            meta.put("promptSceneCode", promptTemplate.getSceneCode());
-                            meta.put("promptVersion", promptTemplate.getVersion());
-                            meta.put("promptName", promptTemplate.getName());
-                        }
-                        meta.put("createdAt", createdAt);
-                        meta.put("updatedAt", System.currentTimeMillis());
-                        redisUtil.hmset(metaKey, meta, CHAT_TTL_SECONDS);
+                        aiChatStoreService.saveConversation(chatId,
+                                buildTitleFromPrompt(userQuestion.getContent()),
+                                userQuestion,
+                                a,
+                                promptTemplate,
+                                usageStats.getPromptTokens(),
+                                usageStats.getCompletionTokens(),
+                                usageStats.getTotalTokens(),
+                                usageStats.getLlmCallCount());
                     }
                 } catch (Exception ignore) {}
             }, e -> {
                 try {
+                    emitTokenUsage(emitter, usageStats);
                     sse(emitter, "\\n\\n銆怉I銆戝垎鏋愬嚭閿欙紝杩愯宸插仠姝紙寮傚父锛塡\n\\n");
                     log.error("AI MCP diagnose stopped: stream error", e);
                     emitter.complete();
                 } catch (Exception ignore) {}
-            });
+            }, usageStats::add);
         } catch (Exception e) {
             try {
                 sse(emitter, "\\n\\n銆怉I銆戣繍琛屽凡鍋滄锛堝紓甯革級\\n\\n");
@@ -311,6 +238,28 @@
         } catch (Exception e) {
             log.warn("SSE send failed", e);
         }
+    }
+
+    private void emitTokenUsage(SseEmitter emitter, AgentUsageStats usageStats) {
+        if (emitter == null || usageStats == null || usageStats.getTotalTokens() <= 0) {
+            return;
+        }
+        try {
+            emitter.send(SseEmitter.event()
+                    .name("token_usage")
+                    .data(JSON.toJSONString(buildTokenUsagePayload(usageStats))));
+        } catch (Exception e) {
+            log.warn("SSE token usage send failed", e);
+        }
+    }
+
+    private Map<String, Object> buildTokenUsagePayload(AgentUsageStats usageStats) {
+        java.util.LinkedHashMap<String, Object> payload = new java.util.LinkedHashMap<>();
+        payload.put("promptTokens", usageStats.getPromptTokens());
+        payload.put("completionTokens", usageStats.getCompletionTokens());
+        payload.put("totalTokens", usageStats.getTotalTokens());
+        payload.put("llmCallCount", usageStats.getLlmCallCount());
+        return payload;
     }
 
     private void sendLargeText(SseEmitter emitter, String text) {
@@ -429,6 +378,39 @@
         }
     }
 
+    private static class AgentUsageStats {
+        private long promptTokens;
+        private long completionTokens;
+        private long totalTokens;
+        private int llmCallCount;
+
+        void add(ChatCompletionResponse.Usage usage) {
+            if (usage == null) {
+                return;
+            }
+            promptTokens += usage.getPromptTokens() == null ? 0L : usage.getPromptTokens();
+            completionTokens += usage.getCompletionTokens() == null ? 0L : usage.getCompletionTokens();
+            totalTokens += usage.getTotalTokens() == null ? 0L : usage.getTotalTokens();
+            llmCallCount++;
+        }
+
+        long getPromptTokens() {
+            return promptTokens;
+        }
+
+        long getCompletionTokens() {
+            return completionTokens;
+        }
+
+        long getTotalTokens() {
+            return totalTokens;
+        }
+
+        int getLlmCallCount() {
+            return llmCallCount;
+        }
+    }
+
     private boolean isConclusionText(String content) {
         if (content == null) return false;
         String c = content;
diff --git a/src/main/java/com/zy/ai/service/impl/AiChatStoreServiceImpl.java b/src/main/java/com/zy/ai/service/impl/AiChatStoreServiceImpl.java
new file mode 100644
index 0000000..8b9c13d
--- /dev/null
+++ b/src/main/java/com/zy/ai/service/impl/AiChatStoreServiceImpl.java
@@ -0,0 +1,196 @@
+package com.zy.ai.service.impl;
+
+import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
+import com.zy.ai.entity.AiChatMessage;
+import com.zy.ai.entity.AiChatSession;
+import com.zy.ai.entity.AiPromptTemplate;
+import com.zy.ai.entity.ChatCompletionRequest;
+import com.zy.ai.mapper.AiChatMessageMapper;
+import com.zy.ai.mapper.AiChatSessionMapper;
+import com.zy.ai.service.AiChatStoreService;
+import lombok.RequiredArgsConstructor;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+@Service
+@RequiredArgsConstructor
+public class AiChatStoreServiceImpl implements AiChatStoreService {
+
+    private final AiChatSessionMapper aiChatSessionMapper;
+    private final AiChatMessageMapper aiChatMessageMapper;
+
+    @Override
+    public List<Map<String, Object>> listChats() {
+        List<AiChatSession> sessions = aiChatSessionMapper.selectList(new QueryWrapper<AiChatSession>()
+                .orderByDesc("update_time")
+                .orderByDesc("id"));
+        List<Map<String, Object>> result = new ArrayList<>();
+        for (AiChatSession session : sessions) {
+            if (session == null) {
+                continue;
+            }
+            LinkedHashMap<String, Object> item = new LinkedHashMap<>();
+            item.put("chatId", session.getChatId());
+            item.put("title", session.getTitle());
+            item.put("size", session.getMessageCount());
+            item.put("promptTemplateId", session.getPromptTemplateId());
+            item.put("promptSceneCode", session.getPromptSceneCode());
+            item.put("promptVersion", session.getPromptVersion());
+            item.put("promptName", session.getPromptName());
+            item.put("lastPromptTokens", session.getLastPromptTokens());
+            item.put("lastCompletionTokens", session.getLastCompletionTokens());
+            item.put("lastTotalTokens", session.getLastTotalTokens());
+            item.put("lastLlmCallCount", session.getLastLlmCallCount());
+            item.put("sumPromptTokens", session.getSumPromptTokens());
+            item.put("sumCompletionTokens", session.getSumCompletionTokens());
+            item.put("sumTotalTokens", session.getSumTotalTokens());
+            item.put("askCount", session.getAskCount());
+            item.put("createdAt", toEpochMilli(session.getCreateTime()));
+            item.put("updatedAt", toEpochMilli(session.getUpdateTime()));
+            item.put("lastTokenUpdatedAt", toEpochMilli(session.getLastTokenUpdatedAt()));
+            result.add(item);
+        }
+        return result;
+    }
+
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public boolean deleteChat(String chatId) {
+        if (isBlank(chatId)) {
+            return false;
+        }
+        aiChatMessageMapper.delete(new QueryWrapper<AiChatMessage>().eq("chat_id", chatId));
+        aiChatSessionMapper.delete(new QueryWrapper<AiChatSession>().eq("chat_id", chatId));
+        return true;
+    }
+
+    @Override
+    public List<ChatCompletionRequest.Message> getChatHistory(String chatId) {
+        if (isBlank(chatId)) {
+            return java.util.Collections.emptyList();
+        }
+        List<AiChatMessage> rows = aiChatMessageMapper.selectList(new QueryWrapper<AiChatMessage>()
+                .eq("chat_id", chatId)
+                .orderByAsc("seq_no")
+                .orderByAsc("id"));
+        List<ChatCompletionRequest.Message> result = new ArrayList<>(rows.size());
+        for (AiChatMessage row : rows) {
+            if (row == null) {
+                continue;
+            }
+            ChatCompletionRequest.Message message = new ChatCompletionRequest.Message();
+            message.setRole(row.getRole());
+            message.setContent(row.getContent());
+            result.add(message);
+        }
+        return result;
+    }
+
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public void saveConversation(String chatId,
+                                 String title,
+                                 ChatCompletionRequest.Message userMessage,
+                                 ChatCompletionRequest.Message assistantMessage,
+                                 AiPromptTemplate promptTemplate,
+                                 long promptTokens,
+                                 long completionTokens,
+                                 long totalTokens,
+                                 int llmCallCount) {
+        if (isBlank(chatId) || userMessage == null || assistantMessage == null) {
+            return;
+        }
+        synchronized (("ai_chat_store_" + chatId).intern()) {
+            AiChatSession session = aiChatSessionMapper.selectOne(new QueryWrapper<AiChatSession>()
+                    .eq("chat_id", chatId)
+                    .last("limit 1"));
+            Date now = new Date();
+            int nextSeq = 1;
+            if (session == null) {
+                session = new AiChatSession();
+                session.setChatId(chatId);
+                session.setCreateTime(now);
+                session.setMessageCount(0);
+                session.setSumPromptTokens(0L);
+                session.setSumCompletionTokens(0L);
+                session.setSumTotalTokens(0L);
+                session.setAskCount(0L);
+            } else {
+                Integer maxSeq = maxSeqNo(chatId);
+                nextSeq = maxSeq == null ? 1 : (maxSeq + 1);
+            }
+
+            session.setTitle(cut(title, 255));
+            if (promptTemplate != null) {
+                session.setPromptTemplateId(promptTemplate.getId());
+                session.setPromptSceneCode(cut(promptTemplate.getSceneCode(), 64));
+                session.setPromptVersion(promptTemplate.getVersion());
+                session.setPromptName(cut(promptTemplate.getName(), 255));
+            } else {
+                session.setPromptTemplateId(null);
+                session.setPromptSceneCode(null);
+                session.setPromptVersion(null);
+                session.setPromptName(null);
+            }
+            session.setLastPromptTokens(promptTokens);
+            session.setLastCompletionTokens(completionTokens);
+            session.setLastTotalTokens(totalTokens);
+            session.setLastLlmCallCount(llmCallCount);
+            session.setLastTokenUpdatedAt(now);
+            session.setMessageCount((session.getMessageCount() == null ? 0 : session.getMessageCount()) + 2);
+            session.setSumPromptTokens((session.getSumPromptTokens() == null ? 0L : session.getSumPromptTokens()) + promptTokens);
+            session.setSumCompletionTokens((session.getSumCompletionTokens() == null ? 0L : session.getSumCompletionTokens()) + completionTokens);
+            session.setSumTotalTokens((session.getSumTotalTokens() == null ? 0L : session.getSumTotalTokens()) + totalTokens);
+            session.setAskCount((session.getAskCount() == null ? 0L : session.getAskCount()) + 1);
+
+            if (session.getId() == null) {
+                aiChatSessionMapper.insert(session);
+            } else {
+                aiChatSessionMapper.updateById(session);
+            }
+
+            insertMessage(chatId, nextSeq, userMessage, now);
+            insertMessage(chatId, nextSeq + 1, assistantMessage, now);
+        }
+    }
+
+    private void insertMessage(String chatId, int seqNo, ChatCompletionRequest.Message source, Date now) {
+        AiChatMessage row = new AiChatMessage();
+        row.setChatId(chatId);
+        row.setSeqNo(seqNo);
+        row.setRole(cut(source.getRole(), 32));
+        row.setContent(source.getContent());
+        row.setCreateTime(now);
+        aiChatMessageMapper.insert(row);
+    }
+
+    private Integer maxSeqNo(String chatId) {
+        AiChatMessage last = aiChatMessageMapper.selectOne(new QueryWrapper<AiChatMessage>()
+                .eq("chat_id", chatId)
+                .orderByDesc("seq_no")
+                .orderByDesc("id")
+                .last("limit 1"));
+        return last == null ? null : last.getSeqNo();
+    }
+
+    private long toEpochMilli(Date date) {
+        return date == null ? 0L : date.getTime();
+    }
+
+    private boolean isBlank(String text) {
+        return text == null || text.trim().isEmpty();
+    }
+
+    private String cut(String text, int maxLen) {
+        if (text == null) {
+            return null;
+        }
+        return text.length() > maxLen ? text.substring(0, maxLen) : text;
+    }
+}
diff --git a/src/main/java/com/zy/core/enums/RedisKeyType.java b/src/main/java/com/zy/core/enums/RedisKeyType.java
index caf05c3..2cda986 100644
--- a/src/main/java/com/zy/core/enums/RedisKeyType.java
+++ b/src/main/java/com/zy/core/enums/RedisKeyType.java
@@ -61,8 +61,6 @@
     CURRENT_CIRCLE_TASK_CRN_NO("current_circle_task_crn_no_"),
     ASYNC_WMS_IN_TASK_REQUEST("async_wms_in_task_request_"),
     ASYNC_WMS_IN_TASK_RESPONSE("async_wms_in_task_response_"),
-    AI_CHAT_HISTORY("ai_chat_history_"),
-    AI_CHAT_META("ai_chat_meta_"),
     MAIN_PROCESS_PSEUDOCODE("main_process_pseudocode"),
     PLANNER_SCHEDULE("planner_schedule_"),
     ;
diff --git a/src/main/resources/sql/20260312_create_sys_ai_chat_storage.sql b/src/main/resources/sql/20260312_create_sys_ai_chat_storage.sql
new file mode 100644
index 0000000..9804e29
--- /dev/null
+++ b/src/main/resources/sql/20260312_create_sys_ai_chat_storage.sql
@@ -0,0 +1,35 @@
+CREATE TABLE IF NOT EXISTS `sys_ai_chat_session` (
+  `id` BIGINT NOT NULL AUTO_INCREMENT COMMENT '涓婚敭',
+  `chat_id` VARCHAR(64) NOT NULL COMMENT '浼氳瘽ID',
+  `title` VARCHAR(255) DEFAULT NULL COMMENT '浼氳瘽鏍囬',
+  `prompt_template_id` BIGINT DEFAULT NULL COMMENT 'Prompt妯℃澘ID',
+  `prompt_scene_code` VARCHAR(64) DEFAULT NULL COMMENT 'Prompt鍦烘櫙',
+  `prompt_version` INT DEFAULT NULL COMMENT 'Prompt鐗堟湰',
+  `prompt_name` VARCHAR(255) DEFAULT NULL COMMENT 'Prompt鍚嶇О',
+  `message_count` INT NOT NULL DEFAULT 0 COMMENT '娑堟伅鏁�',
+  `last_prompt_tokens` BIGINT NOT NULL DEFAULT 0 COMMENT '鏈�杩戜竴娆¤緭鍏okens',
+  `last_completion_tokens` BIGINT NOT NULL DEFAULT 0 COMMENT '鏈�杩戜竴娆¤緭鍑簍okens',
+  `last_total_tokens` BIGINT NOT NULL DEFAULT 0 COMMENT '鏈�杩戜竴娆℃�籺okens',
+  `last_llm_call_count` INT NOT NULL DEFAULT 0 COMMENT '鏈�杩戜竴娆℃ā鍨嬭皟鐢ㄨ疆娆�',
+  `last_token_updated_at` DATETIME DEFAULT NULL COMMENT '鏈�杩戜竴娆okens鏇存柊鏃堕棿',
+  `sum_prompt_tokens` BIGINT NOT NULL DEFAULT 0 COMMENT '绱杈撳叆tokens',
+  `sum_completion_tokens` BIGINT NOT NULL DEFAULT 0 COMMENT '绱杈撳嚭tokens',
+  `sum_total_tokens` BIGINT NOT NULL DEFAULT 0 COMMENT '绱鎬籺okens',
+  `ask_count` BIGINT NOT NULL DEFAULT 0 COMMENT '绱鎻愰棶娆℃暟',
+  `create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '鍒涘缓鏃堕棿',
+  `update_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '鏇存柊鏃堕棿',
+  PRIMARY KEY (`id`),
+  UNIQUE KEY `uk_sys_ai_chat_session_chat_id` (`chat_id`),
+  KEY `idx_sys_ai_chat_session_update_time` (`update_time`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='AI鑱婂ぉ浼氳瘽琛�';
+
+CREATE TABLE IF NOT EXISTS `sys_ai_chat_message` (
+  `id` BIGINT NOT NULL AUTO_INCREMENT COMMENT '涓婚敭',
+  `chat_id` VARCHAR(64) NOT NULL COMMENT '浼氳瘽ID',
+  `seq_no` INT NOT NULL COMMENT '椤哄簭鍙�',
+  `role` VARCHAR(32) NOT NULL COMMENT '瑙掕壊:user/assistant',
+  `content` LONGTEXT COMMENT '娑堟伅鍐呭',
+  `create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '鍒涘缓鏃堕棿',
+  PRIMARY KEY (`id`),
+  KEY `idx_sys_ai_chat_message_chat_seq` (`chat_id`, `seq_no`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='AI鑱婂ぉ娑堟伅琛�';
diff --git a/src/main/webapp/views/ai/diagnosis.html b/src/main/webapp/views/ai/diagnosis.html
index 55bcd08..c613791 100644
--- a/src/main/webapp/views/ai/diagnosis.html
+++ b/src/main/webapp/views/ai/diagnosis.html
@@ -616,7 +616,7 @@
         <footer class="composer-panel">
           <div class="composer-head">
             <div><strong>鍚� AI 鍔╂墜鎻愰棶</strong></div>
-            <div>{{ currentChatId ? '浼氳瘽宸茬粦瀹�' : '涓存椂浼氳瘽' }}</div>
+            <div>{{ currentRunTokenSummary || currentChatTokenSummary || (currentChatId ? '浼氳瘽宸茬粦瀹�' : '涓存椂浼氳瘽') }}</div>
           </div>
           <el-input
             v-model="userInput"
@@ -679,6 +679,7 @@
           lastRenderTs: 0,
           renderIntervalMs: 120,
           stepChars: 6,
+          runTokenUsage: null,
           userInput: '',
           autoScrollThreshold: 80,
           chats: [],
@@ -731,7 +732,15 @@
           var current = this.findChat(this.currentChatId);
           if (!current && this.resetting) return '鏂板缓浼氳瘽锛岀瓑寰呴鏉℃秷鎭�';
           if (!current) return '浼氳瘽 ' + this.currentChatId;
-          return this.chatLabel(current);
+          var tokenText = this.tokenSummaryText(current);
+          return tokenText ? (this.chatLabel(current) + ' 路 ' + tokenText) : this.chatLabel(current);
+        },
+        currentChatTokenSummary: function() {
+          var current = this.findChat(this.currentChatId);
+          return current ? this.tokenSummaryText(current) : '';
+        },
+        currentRunTokenSummary: function() {
+          return this.runTokenUsage ? this.tokenSummaryText(this.runTokenUsage, '鏈') : '';
         },
         inlinePrompts: function() {
           return this.promptPresets.slice(1);
@@ -801,7 +810,22 @@
         },
         chatOptionLabel: function(chat) {
           if (!chat) return '鏈懡鍚嶄細璇�';
-          return this.chatLabel(chat) + ' 路 ' + (chat.size || 0) + ' 鏉� 路 ' + this.chatUpdatedAt(chat);
+          var suffix = this.tokenSummaryText(chat);
+          return this.chatLabel(chat) + ' 路 ' + (chat.size || 0) + ' 鏉� 路 ' + this.chatUpdatedAt(chat) + (suffix ? (' 路 ' + suffix) : '');
+        },
+        numericValue: function(value) {
+          if (value === null || value === undefined || value === '') return 0;
+          var num = Number(value);
+          return isNaN(num) ? 0 : num;
+        },
+        tokenSummaryText: function(source, prefix) {
+          if (!source) return '';
+          var total = this.numericValue(source.totalTokens != null ? source.totalTokens : source.lastTotalTokens);
+          if (!total) return '';
+          var prompt = this.numericValue(source.promptTokens != null ? source.promptTokens : source.lastPromptTokens);
+          var completion = this.numericValue(source.completionTokens != null ? source.completionTokens : source.lastCompletionTokens);
+          var label = prefix || '涓婃';
+          return label + ' tokens ' + total + '锛堣緭' + prompt + ' / 鍑�' + completion + '锛�';
         },
         chatUpdatedAt: function(chat) {
           if (!chat || !chat.updatedAt) return '鍒氬垰鍒涘缓';
@@ -825,6 +849,7 @@
         openChat: function(chatId) {
           if (!chatId || this.streaming) return;
           this.currentChatId = chatId;
+          this.runTokenUsage = null;
           this.switchChat();
         },
         switchChat: function() {
@@ -869,6 +894,7 @@
           if (this.streaming) return;
           this.currentChatId = Date.now() + '_' + Math.random().toString(36).substr(2, 8);
           this.resetting = true;
+          this.runTokenUsage = null;
           this.clear();
         },
         deleteChat: function() {
@@ -932,6 +958,7 @@
           if (!message) return;
           this.loading = true;
           this.streaming = true;
+          this.runTokenUsage = null;
           this.messages.push({ role: 'user', text: message, ts: this.nowStr() });
           this.appendAssistantPlaceholder();
           this.scrollToBottom(true);
@@ -945,6 +972,12 @@
           this.source.onopen = function() {
             self.loading = false;
           };
+          this.source.addEventListener('token_usage', function(e) {
+            if (!e || !e.data) return;
+            try {
+              self.runTokenUsage = JSON.parse(e.data);
+            } catch (ignore) {}
+          });
           this.source.onmessage = function(e) {
             if (!e || !e.data) return;
             var chunk = (e.data || '').replace(/\\n/g, '\n');
@@ -966,6 +999,7 @@
           this.clear();
           this.loading = true;
           this.streaming = true;
+          this.runTokenUsage = null;
           this.appendAssistantPlaceholder();
           this.scrollToBottom(true);
 
@@ -974,6 +1008,12 @@
           this.source.onopen = function() {
             self.loading = false;
           };
+          this.source.addEventListener('token_usage', function(e) {
+            if (!e || !e.data) return;
+            try {
+              self.runTokenUsage = JSON.parse(e.data);
+            } catch (ignore) {}
+          });
           this.source.onmessage = function(e) {
             if (!e || !e.data) return;
             var chunk = (e.data || '').replace(/\\n/g, '\n');
diff --git a/src/main/webapp/views/ai/llm_config.html b/src/main/webapp/views/ai/llm_config.html
index 9ceff36..ba79721 100644
--- a/src/main/webapp/views/ai/llm_config.html
+++ b/src/main/webapp/views/ai/llm_config.html
@@ -436,6 +436,12 @@
       </el-table-column>
       <el-table-column prop="httpStatus" label="鐘舵�佺爜" width="90"></el-table-column>
       <el-table-column prop="latencyMs" label="鑰楁椂(ms)" width="95"></el-table-column>
+      <el-table-column label="Tokens" width="140">
+        <template slot-scope="scope">
+          <div>{{ logTotalTokens(scope.row) }}</div>
+          <div style="color:#909399;font-size:12px;">杈搟{ logPromptTokens(scope.row) }} / 鍑簕{ logCompletionTokens(scope.row) }}</div>
+        </template>
+      </el-table-column>
       <el-table-column prop="traceId" label="TraceId" width="230"></el-table-column>
       <el-table-column label="閿欒" min-width="220">
         <template slot-scope="scope">
@@ -830,6 +836,7 @@
           + '妯″瀷: ' + (row.model || '-') + '\n'
           + '鐘舵�佺爜: ' + (row.httpStatus != null ? row.httpStatus : '-') + '\n'
           + '鑰楁椂: ' + (row.latencyMs != null ? row.latencyMs : '-') + ' ms\n'
+          + 'Tokens: ' + this.logTotalTokens(row) + '锛堣緭' + this.logPromptTokens(row) + ' / 鍑�' + this.logCompletionTokens(row) + '锛塡n'
           + '缁撴灉: ' + (row.success === 1 ? '鎴愬姛' : '澶辫触') + '\n'
           + '閿欒: ' + (row.errorMessage || '-') + '\n\n'
           + '璇锋眰:\n' + (row.requestContent || '-') + '\n\n'
@@ -838,6 +845,27 @@
         this.logDetailText = text;
         this.logDetailVisible = true;
       },
+      parseLogExtra: function(row) {
+        if (!row || !row.extra) return {};
+        if (typeof row.extra === 'object') return row.extra;
+        try {
+          return JSON.parse(row.extra);
+        } catch (e) {
+          return {};
+        }
+      },
+      logPromptTokens: function(row) {
+        var extra = this.parseLogExtra(row);
+        return extra && extra.promptTokens != null ? extra.promptTokens : '-';
+      },
+      logCompletionTokens: function(row) {
+        var extra = this.parseLogExtra(row);
+        return extra && extra.completionTokens != null ? extra.completionTokens : '-';
+      },
+      logTotalTokens: function(row) {
+        var extra = this.parseLogExtra(row);
+        return extra && extra.totalTokens != null ? extra.totalTokens : '-';
+      },
       deleteLog: function(row) {
         var self = this;
         if (!row || !row.id) return;

--
Gitblit v1.9.1