| | |
| | | import com.zy.ai.entity.ChatCompletionResponse; |
| | | import com.zy.ai.entity.LlmCallLog; |
| | | import com.zy.ai.entity.LlmRouteConfig; |
| | | import com.zy.ai.gateway.AiGatewayService; |
| | | import com.zy.ai.gateway.adapter.openai.OpenAiChatCompletionsMapper; |
| | | import com.zy.ai.gateway.model.AiRequest; |
| | | import com.zy.ai.gateway.model.AiResponse; |
| | | import lombok.RequiredArgsConstructor; |
| | | import lombok.extern.slf4j.Slf4j; |
| | | import org.springframework.beans.factory.annotation.Value; |
| | |
| | | private final LlmRoutingService llmRoutingService; |
| | | private final LlmCallLogService llmCallLogService; |
| | | private final LlmSpringAiClientService llmSpringAiClientService; |
| | | private final AiGatewayService aiGatewayService; |
| | | private final OpenAiChatCompletionsMapper openAiChatCompletionsMapper; |
| | | private final AiTokenUsageService aiTokenUsageService; |
| | | |
| | | @Value("${llm.base-url:}") |
| | | private String fallbackBaseUrl; |
| | |
| | | return complete(req, tools != null && !tools.isEmpty() ? "chat_completion_tools" : "chat_completion"); |
| | | } |
| | | |
| | | public ChatCompletionResponse chatCompletionOrThrow(List<ChatCompletionRequest.Message> messages, |
| | | Double temperature, |
| | | Integer maxTokens, |
| | | List<Object> tools) { |
| | | ChatCompletionRequest req = new ChatCompletionRequest(); |
| | | req.setMessages(messages); |
| | | req.setTemperature(temperature != null ? temperature : 0.3); |
| | | req.setMax_tokens(maxTokens != null ? maxTokens : 1024); |
| | | req.setStream(false); |
| | | if (tools != null && !tools.isEmpty()) { |
| | | req.setTools(tools); |
| | | req.setTool_choice("auto"); |
| | | } |
| | | return completeOrThrow(req, tools != null && !tools.isEmpty() ? "chat_completion_tools" : "chat_completion"); |
| | | } |
| | | |
| | | public ChatCompletionResponse complete(ChatCompletionRequest req) { |
| | | return complete(req, "completion"); |
| | | } |
| | | |
| | | private ChatCompletionResponse complete(ChatCompletionRequest req, String scene) { |
| | | String traceId = nextTraceId(); |
| | | List<ResolvedRoute> routes = resolveRoutes(); |
| | | if (routes.isEmpty()) { |
| | | log.error("调用 LLM 失败: 未配置可用 LLM 路由"); |
| | | recordCall(traceId, scene, false, 1, null, false, null, 0L, req, null, null, "none", |
| | | new RuntimeException("未配置可用 LLM 路由"), "no_route"); |
| | | public ChatCompletionResponse complete(ChatCompletionRequest req, String scene) { |
| | | try { |
| | | return completeOrThrow(req, scene); |
| | | } catch (Throwable ex) { |
| | | log.error("调用 LLM 失败, scene={}", scene, ex); |
| | | return null; |
| | | } |
| | | } |
| | | |
| | | Throwable last = null; |
| | | for (int i = 0; i < routes.size(); i++) { |
| | | ResolvedRoute route = routes.get(i); |
| | | boolean hasNext = i < routes.size() - 1; |
| | | ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, false); |
| | | long start = System.currentTimeMillis(); |
| | | try { |
| | | CompletionCallResult callResult = callCompletion(route, routeReq); |
| | | ChatCompletionResponse resp = callResult.response; |
| | | if (!isValidCompletion(resp)) { |
| | | RuntimeException ex = new RuntimeException("LLM 响应为空"); |
| | | boolean canSwitch = shouldSwitch(route, false); |
| | | markFailure(route, ex, canSwitch); |
| | | recordCall(traceId, scene, false, i + 1, route, false, callResult.statusCode, |
| | | System.currentTimeMillis() - start, routeReq, resp, callResult.payload, "error", ex, |
| | | "invalid_completion"); |
| | | if (hasNext && canSwitch) { |
| | | log.warn("LLM 切换到下一路由, current={}, reason={}", route.tag(), ex.getMessage()); |
| | | continue; |
| | | } |
| | | log.error("调用 LLM 失败, route={}", route.tag(), ex); |
| | | last = ex; |
| | | break; |
| | | } |
| | | markSuccess(route); |
| | | recordCall(traceId, scene, false, i + 1, route, true, callResult.statusCode, |
| | | System.currentTimeMillis() - start, routeReq, resp, buildResponseText(resp, callResult.payload), |
| | | "none", null, null); |
| | | return resp; |
| | | } catch (Throwable ex) { |
| | | last = ex; |
| | | boolean quota = isQuotaExhausted(ex); |
| | | boolean canSwitch = shouldSwitch(route, quota); |
| | | markFailure(route, ex, canSwitch); |
| | | recordCall(traceId, scene, false, i + 1, route, false, statusCodeOf(ex), |
| | | System.currentTimeMillis() - start, routeReq, null, responseBodyOf(ex), |
| | | quota ? "quota" : "error", ex, null); |
| | | if (hasNext && canSwitch) { |
| | | log.warn("LLM 切换到下一路由, current={}, reason={}", route.tag(), errorText(ex)); |
| | | continue; |
| | | } |
| | | log.error("调用 LLM 失败, route={}", route.tag(), ex); |
| | | break; |
| | | } |
| | | } |
| | | |
| | | if (last != null) { |
| | | log.error("调用 LLM 全部路由失败: {}", errorText(last)); |
| | | } |
| | | return null; |
| | | public ChatCompletionResponse completeOrThrow(ChatCompletionRequest req, String scene) { |
| | | AiRequest aiRequest = openAiChatCompletionsMapper.toAiRequest(req); |
| | | aiRequest.setScene(scene); |
| | | AiResponse response = aiGatewayService.generate(aiRequest); |
| | | return openAiChatCompletionsMapper.toChatResponse(response); |
| | | } |
| | | |
| | | public void chatStream(List<ChatCompletionRequest.Message> messages, |
| | |
| | | .doOnError(ex -> log.error("调用 Spring AI 流式失败, route={}", route.tag(), ex)); |
| | | } |
| | | |
| | | private CompletionCallResult callCompletion(ResolvedRoute route, ChatCompletionRequest req) { |
| | | return callCompletionWithSpringAi(route, req); |
| | | } |
| | | |
| | | private CompletionCallResult callCompletionWithSpringAi(ResolvedRoute route, ChatCompletionRequest req) { |
| | | LlmSpringAiClientService.CompletionCallResult result = |
| | | llmSpringAiClientService.callCompletion(route.baseUrl, route.apiKey, req); |
| | | return new CompletionCallResult(result.getStatusCode(), result.getPayload(), result.getResponse()); |
| | | } |
| | | |
| | | private ChatCompletionRequest applyRoute(ChatCompletionRequest req, ResolvedRoute route, boolean stream) { |
| | | req.setModel(route.model); |
| | | req.setStream(stream); |
| | |
| | | req.setTool_choice(src.getTool_choice()); |
| | | req.setThinking(src.getThinking()); |
| | | return req; |
| | | } |
| | | |
| | | private boolean isValidCompletion(ChatCompletionResponse response) { |
| | | if (response == null || response.getChoices() == null || response.getChoices().isEmpty()) { |
| | | return false; |
| | | } |
| | | ChatCompletionRequest.Message message = response.getChoices().get(0).getMessage(); |
| | | if (message == null) { |
| | | return false; |
| | | } |
| | | if (!isBlank(message.getContent())) { |
| | | return true; |
| | | } |
| | | return message.getTool_calls() != null && !message.getTool_calls().isEmpty(); |
| | | } |
| | | |
| | | private boolean shouldSwitch(ResolvedRoute route, boolean quota) { |
| | |
| | | return cut(llmSpringAiClientService.responseBodyOf(ex, LOG_TEXT_LIMIT), LOG_TEXT_LIMIT); |
| | | } |
| | | |
| | | private String buildResponseText(ChatCompletionResponse resp, String fallbackPayload) { |
| | | if (resp != null && resp.getChoices() != null && !resp.getChoices().isEmpty() |
| | | && resp.getChoices().get(0) != null && resp.getChoices().get(0).getMessage() != null) { |
| | | ChatCompletionRequest.Message m = resp.getChoices().get(0).getMessage(); |
| | | if (!isBlank(m.getContent())) { |
| | | return cut(m.getContent(), LOG_TEXT_LIMIT); |
| | | } |
| | | if (m.getTool_calls() != null && !m.getTool_calls().isEmpty()) { |
| | | return cut(JSON.toJSONString(m), LOG_TEXT_LIMIT); |
| | | } |
| | | } |
| | | return cut(fallbackPayload, LOG_TEXT_LIMIT); |
| | | } |
| | | |
| | | private String safeName(Throwable ex) { |
| | | return ex == null ? null : ex.getClass().getSimpleName(); |
| | | } |
| | |
| | | item.setExtra(cut(buildExtraPayload(responseObj == null ? null : responseObj.getUsage(), extra), 512)); |
| | | item.setCreateTime(new Date()); |
| | | llmCallLogService.saveIgnoreError(item); |
| | | |
| | | // 累加 token 到独立存储 |
| | | if (success && responseObj != null && responseObj.getUsage() != null) { |
| | | ChatCompletionResponse.Usage usage = responseObj.getUsage(); |
| | | aiTokenUsageService.incrementTokens( |
| | | usage.getPromptTokens() == null ? 0 : usage.getPromptTokens(), |
| | | usage.getCompletionTokens() == null ? 0 : usage.getCompletionTokens(), |
| | | usage.getTotalTokens() == null ? 0 : usage.getTotalTokens(), |
| | | 1); |
| | | } |
| | | } |
| | | |
| | | private ChatCompletionResponse usageResponse(ChatCompletionResponse.Usage usage) { |
| | |
| | | payload.put("note", extra); |
| | | } |
| | | return payload.isEmpty() ? null : JSON.toJSONString(payload); |
| | | } |
| | | |
| | | private static class CompletionCallResult { |
| | | private final int statusCode; |
| | | private final String payload; |
| | | private final ChatCompletionResponse response; |
| | | |
| | | private CompletionCallResult(int statusCode, String payload, ChatCompletionResponse response) { |
| | | this.statusCode = statusCode; |
| | | this.payload = payload; |
| | | this.response = response; |
| | | } |
| | | } |
| | | |
| | | private static class ResolvedRoute { |