package com.zy.ai.service; import com.zy.ai.entity.ChatCompletionRequest; import com.zy.ai.entity.ChatCompletionResponse; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; import org.springframework.stereotype.Service; import org.springframework.web.reactive.function.client.WebClient; import reactor.core.publisher.Mono; import reactor.core.publisher.Flux; import java.util.List; import java.util.function.Consumer; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; @Slf4j @Service @RequiredArgsConstructor public class LlmChatService { private final WebClient llmWebClient; @Value("${llm.api-key}") private String apiKey; @Value("${llm.model}") private String model; /** * 通用对话方法:传入 messages,返回大模型文本回复 */ public String chat(List messages, Double temperature, Integer maxTokens) { ChatCompletionRequest req = new ChatCompletionRequest(); req.setModel(model); req.setMessages(messages); req.setTemperature(temperature != null ? temperature : 0.3); req.setMax_tokens(maxTokens != null ? maxTokens : 1024); ChatCompletionResponse response = llmWebClient.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey) .contentType(MediaType.APPLICATION_JSON) .bodyValue(req) // 2.5.14 已支持 bodyValue .retrieve() .bodyToMono(ChatCompletionResponse.class) .doOnError(ex -> log.error("调用 LLM 失败", ex)) .onErrorResume(ex -> Mono.empty()) .block(); if (response == null || response.getChoices() == null || response.getChoices().isEmpty() || response.getChoices().get(0).getMessage() == null) { return "AI 诊断失败:未获取到有效回复。"; } return response.getChoices().get(0).getMessage().getContent(); } public void chatStream(List messages, Double temperature, Integer maxTokens, Consumer onChunk, Runnable onComplete, Consumer onError) { ChatCompletionRequest req = new ChatCompletionRequest(); req.setModel(model); req.setMessages(messages); req.setTemperature(temperature != null ? temperature : 0.3); req.setMax_tokens(maxTokens != null ? maxTokens : 1024); req.setStream(true); Flux flux = llmWebClient.post() .uri("/chat/completions") .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.TEXT_EVENT_STREAM) .bodyValue(req) .retrieve() .bodyToFlux(String.class) .doOnError(ex -> log.error("调用 LLM 流式失败", ex)); flux.subscribe(payload -> { String s = payload; if (s == null || s.isEmpty()) return; if (s.startsWith("data:")) { s = s.substring(5); if (s.startsWith(" ")) s = s.substring(1); } // 保留模型输出中的换行,只在判断结束标记时忽略空白 if ("[DONE]".equals(s.trim())) return; try { JSONObject obj = JSON.parseObject(s); JSONArray choices = obj.getJSONArray("choices"); if (choices != null && !choices.isEmpty()) { JSONObject c0 = choices.getJSONObject(0); JSONObject delta = c0.getJSONObject("delta"); if (delta != null) { String content = delta.getString("content"); // log.info("chunk = [{}] len = {}", content, content.length()); // for (char ch : content.toCharArray()) { // log.info("char: {} ({})", (int) ch, ch == '\n' ? "\\n" : ch); // } if (content != null) onChunk.accept(content); } } } catch (Exception ignore) {} }, err -> { if (onError != null) onError.accept(err); }, () -> { if (onComplete != null) onComplete.run(); }); } }