#
Junjie
14 小时以前 a19b1025890eb0f71b44a9d1bb948cb084d4106a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package com.zy.ai.service;
 
import com.zy.ai.entity.ChatCompletionRequest;
import com.zy.ai.entity.ChatCompletionResponse;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.publisher.Mono;
import reactor.core.publisher.Flux;
 
import java.util.List;
import java.util.function.Consumer;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
 
@Slf4j
@Service
@RequiredArgsConstructor
public class LlmChatService {
 
    private final WebClient llmWebClient;
 
    @Value("${llm.api-key}")
    private String apiKey;
 
    @Value("${llm.model}")
    private String model;
 
    /**
     * 通用对话方法:传入 messages,返回大模型文本回复
     */
    public String chat(List<ChatCompletionRequest.Message> messages,
                       Double temperature,
                       Integer maxTokens) {
 
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
 
        ChatCompletionResponse response = llmWebClient.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .bodyValue(req)   // 2.5.14 已支持 bodyValue
                .retrieve()
                .bodyToMono(ChatCompletionResponse.class)
                .doOnError(ex -> log.error("调用 LLM 失败", ex))
                .onErrorResume(ex -> Mono.empty())
                .block();
 
        if (response == null ||
                response.getChoices() == null ||
                response.getChoices().isEmpty() ||
                response.getChoices().get(0).getMessage() == null) {
 
            return "AI 诊断失败:未获取到有效回复。";
        }
 
        return response.getChoices().get(0).getMessage().getContent();
    }
 
    public void chatStream(List<ChatCompletionRequest.Message> messages,
                           Double temperature,
                           Integer maxTokens,
                           Consumer<String> onChunk,
                           Runnable onComplete,
                           Consumer<Throwable> onError) {
 
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(true);
 
        Flux<String> flux = llmWebClient.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .retrieve()
                .bodyToFlux(String.class)
                .doOnError(ex -> log.error("调用 LLM 流式失败", ex));
 
        flux.subscribe(payload -> {
            String s = payload;
            if (s == null || s.isEmpty()) return;
            if (s.startsWith("data:")) {
                s = s.substring(5);
                if (s.startsWith(" ")) s = s.substring(1);
            }
            // 保留模型输出中的换行,只在判断结束标记时忽略空白
            if ("[DONE]".equals(s.trim())) return;
            try {
                JSONObject obj = JSON.parseObject(s);
                JSONArray choices = obj.getJSONArray("choices");
                if (choices != null && !choices.isEmpty()) {
                    JSONObject c0 = choices.getJSONObject(0);
                    JSONObject delta = c0.getJSONObject("delta");
                    if (delta != null) {
                        String content = delta.getString("content");
//                        log.info("chunk = [{}] len = {}", content, content.length());
//                        for (char ch : content.toCharArray()) {
//                            log.info("char: {} ({})", (int) ch, ch == '\n' ? "\\n" : ch);
//                        }
                        if (content != null) onChunk.accept(content);
                    }
                }
            } catch (Exception ignore) {}
        }, err -> {
            if (onError != null) onError.accept(err);
        }, () -> {
            if (onComplete != null) onComplete.run();
        });
    }
}