From a4f07b2a0ddb6c210e05afbbb491feeb466203e7 Mon Sep 17 00:00:00 2001
From: Junjie <fallin.jie@qq.com>
Date: 星期一, 09 三月 2026 19:15:50 +0800
Subject: [PATCH] #V3重大更新,升级JDK17,升级SpirngBoot3.5.1
---
src/main/java/com/zy/ai/service/LlmChatService.java | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 234 insertions(+), 5 deletions(-)
diff --git a/src/main/java/com/zy/ai/service/LlmChatService.java b/src/main/java/com/zy/ai/service/LlmChatService.java
index 3e25561..c92ede3 100644
--- a/src/main/java/com/zy/ai/service/LlmChatService.java
+++ b/src/main/java/com/zy/ai/service/LlmChatService.java
@@ -9,16 +9,23 @@
import com.zy.ai.entity.LlmRouteConfig;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.openai.api.OpenAiApi;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;
+import org.springframework.web.client.RestClientResponseException;
import org.springframework.web.reactive.function.client.WebClient;
+import org.springframework.web.reactive.function.client.WebClientResponseException;
import reactor.core.publisher.Flux;
import java.util.ArrayList;
import java.util.Date;
+import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
import java.util.UUID;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
@@ -257,8 +264,15 @@
drain.setDaemon(true);
drain.start();
- streamFlux(route, routeReq).subscribe(payload -> {
+ boolean springAiStreaming = canUseSpringAi(routeReq);
+ Flux<String> streamSource = springAiStreaming ? streamFluxWithSpringAi(route, routeReq) : streamFlux(route, routeReq);
+ streamSource.subscribe(payload -> {
if (payload == null || payload.isEmpty()) return;
+ if (springAiStreaming) {
+ queue.offer(payload);
+ appendLimited(outputBuffer, payload);
+ return;
+ }
String[] events = payload.split("\\r?\\n\\r?\\n");
for (String part : events) {
String s = part;
@@ -350,7 +364,26 @@
.doOnError(ex -> log.error("璋冪敤 LLM 娴佸紡澶辫触, route={}", route.tag(), ex));
}
+ private Flux<String> streamFluxWithSpringAi(ResolvedRoute route, ChatCompletionRequest req) {
+ OpenAiApi api = buildOpenAiApi(route);
+ OpenAiApi.ChatCompletionRequest springReq = buildSpringAiRequest(route, req, true);
+ return api.chatCompletionStream(springReq)
+ .flatMapIterable(chunk -> chunk == null || chunk.choices() == null ? List.<OpenAiApi.ChatCompletionChunk.ChunkChoice>of() : chunk.choices())
+ .map(OpenAiApi.ChatCompletionChunk.ChunkChoice::delta)
+ .filter(Objects::nonNull)
+ .map(this::extractSpringAiContent)
+ .filter(text -> text != null && !text.isEmpty())
+ .doOnError(ex -> log.error("璋冪敤 Spring AI 娴佸紡澶辫触, route={}", route.tag(), ex));
+ }
+
private CompletionCallResult callCompletion(ResolvedRoute route, ChatCompletionRequest req) {
+ if (canUseSpringAi(req)) {
+ return callCompletionWithSpringAi(route, req);
+ }
+ return callCompletionWithWebClient(route, req);
+ }
+
+ private CompletionCallResult callCompletionWithWebClient(ResolvedRoute route, ChatCompletionRequest req) {
WebClient client = WebClient.builder().baseUrl(route.baseUrl).build();
RawCompletionResult raw = client.post()
.uri("/chat/completions")
@@ -370,6 +403,16 @@
throw new LlmRouteException(raw.statusCode, raw.payload);
}
return new CompletionCallResult(raw.statusCode, raw.payload, parseCompletion(raw.payload));
+ }
+
+ private CompletionCallResult callCompletionWithSpringAi(ResolvedRoute route, ChatCompletionRequest req) {
+ OpenAiApi api = buildOpenAiApi(route);
+ OpenAiApi.ChatCompletionRequest springReq = buildSpringAiRequest(route, req, false);
+ ResponseEntity<OpenAiApi.ChatCompletion> entity = api.chatCompletionEntity(springReq);
+ OpenAiApi.ChatCompletion body = entity.getBody();
+ return new CompletionCallResult(entity.getStatusCode().value(),
+ body == null ? null : JSON.toJSONString(body),
+ toLegacyResponse(body));
}
private ChatCompletionRequest applyRoute(ChatCompletionRequest req, ResolvedRoute route, boolean stream) {
@@ -416,6 +459,10 @@
return quota ? route.switchOnQuota : route.switchOnError;
}
+ private boolean canUseSpringAi(ChatCompletionRequest req) {
+ return req != null && (req.getTools() == null || req.getTools().isEmpty());
+ }
+
private void markSuccess(ResolvedRoute route) {
if (route.id != null) {
llmRoutingService.markSuccess(route.id);
@@ -438,14 +485,32 @@
}
return "status=" + e.statusCode + ", body=" + body;
}
+ if (ex instanceof RestClientResponseException) {
+ RestClientResponseException e = (RestClientResponseException) ex;
+ String body = e.getResponseBodyAsString();
+ if (body != null && body.length() > 240) {
+ body = body.substring(0, 240);
+ }
+ return "status=" + e.getStatusCode().value() + ", body=" + body;
+ }
+ if (ex instanceof WebClientResponseException) {
+ WebClientResponseException e = (WebClientResponseException) ex;
+ String body = e.getResponseBodyAsString();
+ if (body != null && body.length() > 240) {
+ body = body.substring(0, 240);
+ }
+ return "status=" + e.getStatusCode().value() + ", body=" + body;
+ }
return ex.getMessage() == null ? ex.toString() : ex.getMessage();
}
private boolean isQuotaExhausted(Throwable ex) {
- if (!(ex instanceof LlmRouteException)) return false;
- LlmRouteException e = (LlmRouteException) ex;
- if (e.statusCode == 429) return true;
- String text = (e.body == null ? "" : e.body).toLowerCase();
+ Integer status = statusCodeOf(ex);
+ if (status != null && status == 429) {
+ return true;
+ }
+ String text = responseBodyOf(ex);
+ text = text == null ? "" : text.toLowerCase(Locale.ROOT);
return text.contains("insufficient_quota")
|| text.contains("quota")
|| text.contains("浣欓")
@@ -591,12 +656,24 @@
if (ex instanceof LlmRouteException) {
return ((LlmRouteException) ex).statusCode;
}
+ if (ex instanceof RestClientResponseException) {
+ return ((RestClientResponseException) ex).getStatusCode().value();
+ }
+ if (ex instanceof WebClientResponseException) {
+ return ((WebClientResponseException) ex).getStatusCode().value();
+ }
return null;
}
private String responseBodyOf(Throwable ex) {
if (ex instanceof LlmRouteException) {
return cut(((LlmRouteException) ex).body, LOG_TEXT_LIMIT);
+ }
+ if (ex instanceof RestClientResponseException) {
+ return cut(((RestClientResponseException) ex).getResponseBodyAsString(), LOG_TEXT_LIMIT);
+ }
+ if (ex instanceof WebClientResponseException) {
+ return cut(((WebClientResponseException) ex).getResponseBodyAsString(), LOG_TEXT_LIMIT);
}
return null;
}
@@ -619,6 +696,158 @@
return ex == null ? null : ex.getClass().getSimpleName();
}
+ private OpenAiApi buildOpenAiApi(ResolvedRoute route) {
+ return OpenAiApi.builder()
+ .baseUrl(route.baseUrl)
+ .apiKey(route.apiKey)
+ .build();
+ }
+
+ private OpenAiApi.ChatCompletionRequest buildSpringAiRequest(ResolvedRoute route,
+ ChatCompletionRequest req,
+ boolean stream) {
+ HashMap<String, Object> extraBody = new HashMap<>();
+ if (route.thinkingEnabled || req.getThinking() != null) {
+ HashMap<String, Object> thinking = new HashMap<>();
+ thinking.put("type", req.getThinking() != null && req.getThinking().getType() != null
+ ? req.getThinking().getType()
+ : "enable");
+ extraBody.put("thinking", thinking);
+ }
+ return new OpenAiApi.ChatCompletionRequest(
+ toSpringAiMessages(req.getMessages()),
+ route.model,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ req.getMax_tokens(),
+ null,
+ 1,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ stream,
+ stream ? OpenAiApi.ChatCompletionRequest.StreamOptions.INCLUDE_USAGE : null,
+ req.getTemperature(),
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ extraBody.isEmpty() ? null : extraBody
+ );
+ }
+
+ private List<OpenAiApi.ChatCompletionMessage> toSpringAiMessages(List<ChatCompletionRequest.Message> messages) {
+ ArrayList<OpenAiApi.ChatCompletionMessage> result = new ArrayList<>();
+ if (messages == null) {
+ return result;
+ }
+ for (ChatCompletionRequest.Message message : messages) {
+ if (message == null) {
+ continue;
+ }
+ result.add(new OpenAiApi.ChatCompletionMessage(
+ message.getContent(),
+ toSpringAiRole(message.getRole())
+ ));
+ }
+ return result;
+ }
+
+ private OpenAiApi.ChatCompletionMessage.Role toSpringAiRole(String role) {
+ if (role == null) {
+ return OpenAiApi.ChatCompletionMessage.Role.USER;
+ }
+ switch (role.trim().toLowerCase(Locale.ROOT)) {
+ case "system":
+ return OpenAiApi.ChatCompletionMessage.Role.SYSTEM;
+ case "assistant":
+ return OpenAiApi.ChatCompletionMessage.Role.ASSISTANT;
+ case "tool":
+ return OpenAiApi.ChatCompletionMessage.Role.TOOL;
+ default:
+ return OpenAiApi.ChatCompletionMessage.Role.USER;
+ }
+ }
+
+ private ChatCompletionResponse toLegacyResponse(OpenAiApi.ChatCompletion completion) {
+ if (completion == null) {
+ return null;
+ }
+ ChatCompletionResponse response = new ChatCompletionResponse();
+ response.setId(completion.id());
+ response.setCreated(completion.created());
+ response.setObjectName(completion.object());
+ if (completion.usage() != null) {
+ ChatCompletionResponse.Usage usage = new ChatCompletionResponse.Usage();
+ usage.setPromptTokens(completion.usage().promptTokens());
+ usage.setCompletionTokens(completion.usage().completionTokens());
+ usage.setTotalTokens(completion.usage().totalTokens());
+ response.setUsage(usage);
+ }
+ if (completion.choices() != null) {
+ ArrayList<ChatCompletionResponse.Choice> choices = new ArrayList<>();
+ for (OpenAiApi.ChatCompletion.Choice choice : completion.choices()) {
+ ChatCompletionResponse.Choice item = new ChatCompletionResponse.Choice();
+ item.setIndex(choice.index());
+ if (choice.finishReason() != null) {
+ item.setFinishReason(choice.finishReason().name().toLowerCase(Locale.ROOT));
+ }
+ item.setMessage(toLegacyMessage(choice.message()));
+ choices.add(item);
+ }
+ response.setChoices(choices);
+ }
+ return response;
+ }
+
+ private ChatCompletionRequest.Message toLegacyMessage(OpenAiApi.ChatCompletionMessage message) {
+ if (message == null) {
+ return null;
+ }
+ ChatCompletionRequest.Message result = new ChatCompletionRequest.Message();
+ result.setContent(extractSpringAiContent(message));
+ if (message.role() != null) {
+ result.setRole(message.role().name().toLowerCase(Locale.ROOT));
+ }
+ result.setName(message.name());
+ result.setTool_call_id(message.toolCallId());
+ return result;
+ }
+
+ private String extractSpringAiContent(OpenAiApi.ChatCompletionMessage message) {
+ if (message == null || message.rawContent() == null) {
+ return null;
+ }
+ Object content = message.rawContent();
+ if (content instanceof String) {
+ return (String) content;
+ }
+ if (content instanceof List) {
+ try {
+ @SuppressWarnings("unchecked")
+ List<OpenAiApi.ChatCompletionMessage.MediaContent> media =
+ (List<OpenAiApi.ChatCompletionMessage.MediaContent>) content;
+ return OpenAiApi.getTextContent(media);
+ } catch (ClassCastException ignore) {
+ }
+ }
+ return String.valueOf(content);
+ }
+
private String cut(String text, int maxLen) {
if (text == null) return null;
String clean = text.replace("\r", " ");
--
Gitblit v1.9.1