Junjie
2 天以前 63b01db83d9aad8a15276b4236a9a22e4aeef065
src/main/java/com/zy/ai/service/LlmChatService.java
@@ -5,6 +5,10 @@
import com.zy.ai.entity.ChatCompletionResponse;
import com.zy.ai.entity.LlmCallLog;
import com.zy.ai.entity.LlmRouteConfig;
import com.zy.ai.gateway.AiGatewayService;
import com.zy.ai.gateway.adapter.openai.OpenAiChatCompletionsMapper;
import com.zy.ai.gateway.model.AiRequest;
import com.zy.ai.gateway.model.AiResponse;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
@@ -35,6 +39,9 @@
    private final LlmRoutingService llmRoutingService;
    private final LlmCallLogService llmCallLogService;
    private final LlmSpringAiClientService llmSpringAiClientService;
    private final AiGatewayService aiGatewayService;
    private final OpenAiChatCompletionsMapper openAiChatCompletionsMapper;
    private final AiTokenUsageService aiTokenUsageService;
    @Value("${llm.base-url:}")
    private String fallbackBaseUrl;
@@ -91,70 +98,40 @@
        return complete(req, tools != null && !tools.isEmpty() ? "chat_completion_tools" : "chat_completion");
    }
    public ChatCompletionResponse chatCompletionOrThrow(List<ChatCompletionRequest.Message> messages,
                                                        Double temperature,
                                                        Integer maxTokens,
                                                        List<Object> tools) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(false);
        if (tools != null && !tools.isEmpty()) {
            req.setTools(tools);
            req.setTool_choice("auto");
        }
        return completeOrThrow(req, tools != null && !tools.isEmpty() ? "chat_completion_tools" : "chat_completion");
    }
    public ChatCompletionResponse complete(ChatCompletionRequest req) {
        return complete(req, "completion");
    }
    private ChatCompletionResponse complete(ChatCompletionRequest req, String scene) {
        String traceId = nextTraceId();
        List<ResolvedRoute> routes = resolveRoutes();
        if (routes.isEmpty()) {
            log.error("调用 LLM 失败: 未配置可用 LLM 路由");
            recordCall(traceId, scene, false, 1, null, false, null, 0L, req, null, null, "none",
                    new RuntimeException("未配置可用 LLM 路由"), "no_route");
    public ChatCompletionResponse complete(ChatCompletionRequest req, String scene) {
        try {
            return completeOrThrow(req, scene);
        } catch (Throwable ex) {
            log.error("调用 LLM 失败, scene={}", scene, ex);
            return null;
        }
    }
        Throwable last = null;
        for (int i = 0; i < routes.size(); i++) {
            ResolvedRoute route = routes.get(i);
            boolean hasNext = i < routes.size() - 1;
            ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, false);
            long start = System.currentTimeMillis();
            try {
                CompletionCallResult callResult = callCompletion(route, routeReq);
                ChatCompletionResponse resp = callResult.response;
                if (!isValidCompletion(resp)) {
                    RuntimeException ex = new RuntimeException("LLM 响应为空");
                    boolean canSwitch = shouldSwitch(route, false);
                    markFailure(route, ex, canSwitch);
                    recordCall(traceId, scene, false, i + 1, route, false, callResult.statusCode,
                            System.currentTimeMillis() - start, routeReq, resp, callResult.payload, "error", ex,
                            "invalid_completion");
                    if (hasNext && canSwitch) {
                        log.warn("LLM 切换到下一路由, current={}, reason={}", route.tag(), ex.getMessage());
                        continue;
                    }
                    log.error("调用 LLM 失败, route={}", route.tag(), ex);
                    last = ex;
                    break;
                }
                markSuccess(route);
                recordCall(traceId, scene, false, i + 1, route, true, callResult.statusCode,
                        System.currentTimeMillis() - start, routeReq, resp, buildResponseText(resp, callResult.payload),
                        "none", null, null);
                return resp;
            } catch (Throwable ex) {
                last = ex;
                boolean quota = isQuotaExhausted(ex);
                boolean canSwitch = shouldSwitch(route, quota);
                markFailure(route, ex, canSwitch);
                recordCall(traceId, scene, false, i + 1, route, false, statusCodeOf(ex),
                        System.currentTimeMillis() - start, routeReq, null, responseBodyOf(ex),
                        quota ? "quota" : "error", ex, null);
                if (hasNext && canSwitch) {
                    log.warn("LLM 切换到下一路由, current={}, reason={}", route.tag(), errorText(ex));
                    continue;
                }
                log.error("调用 LLM 失败, route={}", route.tag(), ex);
                break;
            }
        }
        if (last != null) {
            log.error("调用 LLM 全部路由失败: {}", errorText(last));
        }
        return null;
    public ChatCompletionResponse completeOrThrow(ChatCompletionRequest req, String scene) {
        AiRequest aiRequest = openAiChatCompletionsMapper.toAiRequest(req);
        aiRequest.setScene(scene);
        AiResponse response = aiGatewayService.generate(aiRequest);
        return openAiChatCompletionsMapper.toChatResponse(response);
    }
    public void chatStream(List<ChatCompletionRequest.Message> messages,
@@ -304,16 +281,6 @@
                .doOnError(ex -> log.error("调用 Spring AI 流式失败, route={}", route.tag(), ex));
    }
    private CompletionCallResult callCompletion(ResolvedRoute route, ChatCompletionRequest req) {
        return callCompletionWithSpringAi(route, req);
    }
    private CompletionCallResult callCompletionWithSpringAi(ResolvedRoute route, ChatCompletionRequest req) {
        LlmSpringAiClientService.CompletionCallResult result =
                llmSpringAiClientService.callCompletion(route.baseUrl, route.apiKey, req);
        return new CompletionCallResult(result.getStatusCode(), result.getPayload(), result.getResponse());
    }
    private ChatCompletionRequest applyRoute(ChatCompletionRequest req, ResolvedRoute route, boolean stream) {
        req.setModel(route.model);
        req.setStream(stream);
@@ -338,20 +305,6 @@
        req.setTool_choice(src.getTool_choice());
        req.setThinking(src.getThinking());
        return req;
    }
    private boolean isValidCompletion(ChatCompletionResponse response) {
        if (response == null || response.getChoices() == null || response.getChoices().isEmpty()) {
            return false;
        }
        ChatCompletionRequest.Message message = response.getChoices().get(0).getMessage();
        if (message == null) {
            return false;
        }
        if (!isBlank(message.getContent())) {
            return true;
        }
        return message.getTool_calls() != null && !message.getTool_calls().isEmpty();
    }
    private boolean shouldSwitch(ResolvedRoute route, boolean quota) {
@@ -471,20 +424,6 @@
        return cut(llmSpringAiClientService.responseBodyOf(ex, LOG_TEXT_LIMIT), LOG_TEXT_LIMIT);
    }
    private String buildResponseText(ChatCompletionResponse resp, String fallbackPayload) {
        if (resp != null && resp.getChoices() != null && !resp.getChoices().isEmpty()
                && resp.getChoices().get(0) != null && resp.getChoices().get(0).getMessage() != null) {
            ChatCompletionRequest.Message m = resp.getChoices().get(0).getMessage();
            if (!isBlank(m.getContent())) {
                return cut(m.getContent(), LOG_TEXT_LIMIT);
            }
            if (m.getTool_calls() != null && !m.getTool_calls().isEmpty()) {
                return cut(JSON.toJSONString(m), LOG_TEXT_LIMIT);
            }
        }
        return cut(fallbackPayload, LOG_TEXT_LIMIT);
    }
    private String safeName(Throwable ex) {
        return ex == null ? null : ex.getClass().getSimpleName();
    }
@@ -531,6 +470,16 @@
        item.setExtra(cut(buildExtraPayload(responseObj == null ? null : responseObj.getUsage(), extra), 512));
        item.setCreateTime(new Date());
        llmCallLogService.saveIgnoreError(item);
        // 累加 token 到独立存储
        if (success && responseObj != null && responseObj.getUsage() != null) {
            ChatCompletionResponse.Usage usage = responseObj.getUsage();
            aiTokenUsageService.incrementTokens(
                    usage.getPromptTokens() == null ? 0 : usage.getPromptTokens(),
                    usage.getCompletionTokens() == null ? 0 : usage.getCompletionTokens(),
                    usage.getTotalTokens() == null ? 0 : usage.getTotalTokens(),
                    1);
        }
    }
    private ChatCompletionResponse usageResponse(ChatCompletionResponse.Usage usage) {
@@ -562,18 +511,6 @@
            payload.put("note", extra);
        }
        return payload.isEmpty() ? null : JSON.toJSONString(payload);
    }
    private static class CompletionCallResult {
        private final int statusCode;
        private final String payload;
        private final ChatCompletionResponse response;
        private CompletionCallResult(int statusCode, String payload, ChatCompletionResponse response) {
            this.statusCode = statusCode;
            this.payload = payload;
            this.response = response;
        }
    }
    private static class ResolvedRoute {