#
Junjie
1 天以前 6d29b4cb573525c1092a67ef37aacf7ef2233723
src/main/java/com/zy/ai/service/LlmChatService.java
@@ -1,7 +1,11 @@
package com.zy.ai.service;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zy.ai.entity.ChatCompletionRequest;
import com.zy.ai.entity.ChatCompletionResponse;
import com.zy.ai.entity.LlmRouteConfig;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
@@ -9,37 +13,33 @@
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.publisher.Mono;
import reactor.core.publisher.Flux;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import java.util.function.Consumer;
@Slf4j
@Service
@RequiredArgsConstructor
public class LlmChatService {
    private final WebClient llmWebClient;
    private final LlmRoutingService llmRoutingService;
    @Value("${llm.api-key}")
    private String apiKey;
    @Value("${llm.base-url:}")
    private String fallbackBaseUrl;
    @Value("${llm.model}")
    private String model;
    @Value("${llm.api-key:}")
    private String fallbackApiKey;
    @Value("${llm.pythonPlatformUrl}")
    private String pythonPlatformUrl;
    @Value("${llm.model:}")
    private String fallbackModel;
    @Value("${llm.thinking}")
    private String thinking;
    @Value("${llm.thinking:false}")
    private String fallbackThinking;
    /**
     * 通用对话方法:传入 messages,返回大模型文本回复
@@ -49,27 +49,12 @@
                       Integer maxTokens) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(false);
        ChatCompletionResponse response = llmWebClient.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .exchangeToMono(resp -> resp.bodyToFlux(String.class)
                        .collectList()
                        .map(list -> {
                            String payload = String.join("\n\n", list);
                            return parseCompletion(payload);
                        }))
                .doOnError(ex -> log.error("调用 LLM 失败", ex))
                .onErrorResume(ex -> Mono.empty())
                .block();
        ChatCompletionResponse response = complete(req);
        if (response == null ||
                response.getChoices() == null ||
@@ -88,17 +73,10 @@
                                                 Integer maxTokens,
                                                 List<Object> tools) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(false);
        if(thinking.equals("enable")) {
            ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking();
            thinking.setType("enable");
            req.setThinking(thinking);
        }
        if (tools != null && !tools.isEmpty()) {
            req.setTools(tools);
            req.setTool_choice("auto");
@@ -107,26 +85,42 @@
    }
    public ChatCompletionResponse complete(ChatCompletionRequest req) {
        try {
            return llmWebClient.post()
                    .uri("/chat/completions")
                    .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                    .contentType(MediaType.APPLICATION_JSON)
                    .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
                    .bodyValue(req)
                    .exchangeToMono(resp -> resp.bodyToFlux(String.class)
                            .collectList()
                            .map(list -> {
                                String payload = String.join("\n\n", list);
                                return parseCompletion(payload);
                            }))
                    .doOnError(ex -> log.error("调用 LLM 失败", ex))
                    .onErrorResume(ex -> Mono.empty())
                    .block();
        } catch (Exception e) {
            log.error("调用 LLM 失败", e);
        List<ResolvedRoute> routes = resolveRoutes();
        if (routes.isEmpty()) {
            log.error("调用 LLM 失败: 未配置可用 LLM 路由");
            return null;
        }
        Throwable last = null;
        for (int i = 0; i < routes.size(); i++) {
            ResolvedRoute route = routes.get(i);
            boolean hasNext = i < routes.size() - 1;
            try {
                ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, false);
                ChatCompletionResponse resp = callCompletion(route, routeReq);
                if (!isValidCompletion(resp)) {
                    throw new RuntimeException("LLM 响应为空");
                }
                markSuccess(route);
                return resp;
            } catch (Throwable ex) {
                last = ex;
                boolean quota = isQuotaExhausted(ex);
                boolean canSwitch = shouldSwitch(route, quota);
                markFailure(route, ex, canSwitch);
                if (hasNext && canSwitch) {
                    log.warn("LLM 切换到下一路由, current={}, reason={}", route.tag(), errorText(ex));
                    continue;
                }
                log.error("调用 LLM 失败, route={}", route.tag(), ex);
                break;
            }
        }
        if (last != null) {
            log.error("调用 LLM 全部路由失败: {}", errorText(last));
        }
        return null;
    }
    public void chatStream(List<ChatCompletionRequest.Message> messages,
@@ -137,92 +131,12 @@
                           Consumer<Throwable> onError) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(true);
        Flux<String> flux = llmWebClient.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .retrieve()
                .bodyToFlux(String.class)
                .doOnError(ex -> log.error("调用 LLM 流式失败", ex));
        AtomicBoolean doneSeen = new AtomicBoolean(false);
        AtomicBoolean errorSeen = new AtomicBoolean(false);
        LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();
        Thread drain = new Thread(() -> {
            try {
                while (true) {
                    String s = queue.poll(2, TimeUnit.SECONDS);
                    if (s != null) {
                        try { onChunk.accept(s); } catch (Exception ignore) {}
                    }
                    if (doneSeen.get() && queue.isEmpty()) {
                        if (!errorSeen.get()) {
                            try { if (onComplete != null) onComplete.run(); } catch (Exception ignore) {}
                        }
                        break;
                    }
                }
            } catch (InterruptedException ignore) {
                ignore.printStackTrace();
            }
        });
        drain.setDaemon(true);
        drain.start();
        flux.subscribe(payload -> {
            if (payload == null || payload.isEmpty()) return;
            String[] events = payload.split("\\r?\\n\\r?\\n");
            for (String part : events) {
                String s = part;
                if (s == null || s.isEmpty()) continue;
                if (s.startsWith("data:")) {
                    s = s.substring(5);
                    if (s.startsWith(" ")) s = s.substring(1);
                }
                if ("[DONE]".equals(s.trim())) {
                    doneSeen.set(true);
                    continue;
                }
                try {
                    JSONObject obj = JSON.parseObject(s);
                    JSONArray choices = obj.getJSONArray("choices");
                    if (choices != null && !choices.isEmpty()) {
                        JSONObject c0 = choices.getJSONObject(0);
                        JSONObject delta = c0.getJSONObject("delta");
                        if (delta != null) {
                            String content = delta.getString("content");
                            if (content != null) {
                                try { queue.offer(content); } catch (Exception ignore) {}
                            }
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }, err -> {
            errorSeen.set(true);
            doneSeen.set(true);
            if (onError != null) onError.accept(err);
        }, () -> {
            if (!doneSeen.get()) {
                errorSeen.set(true);
                doneSeen.set(true);
                if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成"));
            } else {
                doneSeen.set(true);
            }
        });
        streamWithFailover(req, onChunk, onComplete, onError);
    }
    public void chatStreamWithTools(List<ChatCompletionRequest.Message> messages,
@@ -233,120 +147,46 @@
                                    Runnable onComplete,
                                    Consumer<Throwable> onError) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(model);
        req.setMessages(messages);
        req.setTemperature(temperature != null ? temperature : 0.3);
        req.setMax_tokens(maxTokens != null ? maxTokens : 1024);
        req.setStream(true);
        if(thinking.equals("enable")) {
            ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking();
            thinking.setType("enable");
            req.setThinking(thinking);
        }
        if (tools != null && !tools.isEmpty()) {
            req.setTools(tools);
            req.setTool_choice("auto");
        }
        Flux<String> flux = llmWebClient.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .retrieve()
                .bodyToFlux(String.class)
                .doOnError(ex -> log.error("调用 LLM 流式失败", ex));
        AtomicBoolean doneSeen = new AtomicBoolean(false);
        AtomicBoolean errorSeen = new AtomicBoolean(false);
        LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();
        Thread drain = new Thread(() -> {
            try {
                while (true) {
                    String s = queue.poll(5, TimeUnit.SECONDS);
                    if (s != null) {
                        try { onChunk.accept(s); } catch (Exception ignore) {}
                    }
                    if (doneSeen.get() && queue.isEmpty()) {
                        if (!errorSeen.get()) {
                            try { if (onComplete != null) onComplete.run(); } catch (Exception ignore) {}
                        }
                        break;
                    }
                }
            } catch (InterruptedException ignore) {
                ignore.printStackTrace();
            }
        });
        drain.setDaemon(true);
        drain.start();
        flux.subscribe(payload -> {
            if (payload == null || payload.isEmpty()) return;
            String[] events = payload.split("\\r?\\n\\r?\\n");
            for (String part : events) {
                String s = part;
                if (s == null || s.isEmpty()) continue;
                if (s.startsWith("data:")) {
                    s = s.substring(5);
                    if (s.startsWith(" ")) s = s.substring(1);
                }
                if ("[DONE]".equals(s.trim())) {
                    doneSeen.set(true);
                    continue;
                }
                try {
                    JSONObject obj = JSON.parseObject(s);
                    JSONArray choices = obj.getJSONArray("choices");
                    if (choices != null && !choices.isEmpty()) {
                        JSONObject c0 = choices.getJSONObject(0);
                        JSONObject delta = c0.getJSONObject("delta");
                        if (delta != null) {
                            String content = delta.getString("content");
                            if (content != null) {
                                try { queue.offer(content); } catch (Exception ignore) {}
                            }
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }, err -> {
            errorSeen.set(true);
            doneSeen.set(true);
            if (onError != null) onError.accept(err);
        }, () -> {
            if (!doneSeen.get()) {
                errorSeen.set(true);
                doneSeen.set(true);
                if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成"));
            } else {
                doneSeen.set(true);
            }
        });
        streamWithFailover(req, onChunk, onComplete, onError);
    }
    public void chatStreamRunPython(String prompt, String chatId, Consumer<String> onChunk,
    private void streamWithFailover(ChatCompletionRequest req,
                                    Consumer<String> onChunk,
                                    Runnable onComplete,
                                    Consumer<Throwable> onError) {
        HashMap<String, Object> req = new HashMap<>();
        req.put("prompt", prompt);
        req.put("chatId", chatId);
        List<ResolvedRoute> routes = resolveRoutes();
        if (routes.isEmpty()) {
            if (onError != null) onError.accept(new RuntimeException("未配置可用 LLM 路由"));
            return;
        }
        attemptStream(routes, 0, req, onChunk, onComplete, onError);
    }
        Flux<String> flux = llmWebClient.post()
                .uri(pythonPlatformUrl)
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .retrieve()
                .bodyToFlux(String.class)
                .doOnError(ex -> log.error("调用 LLM 流式失败", ex));
    private void attemptStream(List<ResolvedRoute> routes,
                               int index,
                               ChatCompletionRequest req,
                               Consumer<String> onChunk,
                               Runnable onComplete,
                               Consumer<Throwable> onError) {
        if (index >= routes.size()) {
            if (onError != null) onError.accept(new RuntimeException("LLM 路由全部失败"));
            return;
        }
        ResolvedRoute route = routes.get(index);
        ChatCompletionRequest routeReq = applyRoute(cloneRequest(req), route, true);
        AtomicBoolean doneSeen = new AtomicBoolean(false);
        AtomicBoolean errorSeen = new AtomicBoolean(false);
        AtomicBoolean emitted = new AtomicBoolean(false);
        LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<>();
        Thread drain = new Thread(() -> {
@@ -354,6 +194,7 @@
                while (true) {
                    String s = queue.poll(2, TimeUnit.SECONDS);
                    if (s != null) {
                        emitted.set(true);
                        try {
                            onChunk.accept(s);
                        } catch (Exception ignore) {
@@ -370,13 +211,12 @@
                    }
                }
            } catch (InterruptedException ignore) {
                ignore.printStackTrace();
            }
        });
        drain.setDaemon(true);
        drain.start();
        flux.subscribe(payload -> {
        streamFlux(route, routeReq).subscribe(payload -> {
            if (payload == null || payload.isEmpty()) return;
            String[] events = payload.split("\\r?\\n\\r?\\n");
            for (String part : events) {
@@ -390,10 +230,6 @@
                    doneSeen.set(true);
                    continue;
                }
                if("<think>".equals(s.trim()) || "</think>".equals(s.trim())) {
                    queue.offer(s.trim());
                    continue;
                }
                try {
                    JSONObject obj = JSON.parseObject(s);
                    JSONArray choices = obj.getJSONArray("choices");
@@ -403,30 +239,190 @@
                        if (delta != null) {
                            String content = delta.getString("content");
                            if (content != null) {
                                try {
                                    queue.offer(content);
                                } catch (Exception ignore) {
                                }
                                queue.offer(content);
                            }
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                    log.warn("解析 LLM stream 片段失败: {}", e.getMessage());
                }
            }
        }, err -> {
            errorSeen.set(true);
            doneSeen.set(true);
            boolean quota = isQuotaExhausted(err);
            boolean canSwitch = shouldSwitch(route, quota);
            markFailure(route, err, canSwitch);
            if (!emitted.get() && canSwitch && index < routes.size() - 1) {
                log.warn("LLM 路由失败,自动切换,current={}, reason={}", route.tag(), errorText(err));
                attemptStream(routes, index + 1, req, onChunk, onComplete, onError);
                return;
            }
            if (onError != null) onError.accept(err);
        }, () -> {
            if (!doneSeen.get()) {
                RuntimeException ex = new RuntimeException("LLM 流意外完成");
                errorSeen.set(true);
                doneSeen.set(true);
                if (onError != null) onError.accept(new RuntimeException("LLM 流意外完成"));
                boolean canSwitch = shouldSwitch(route, false);
                markFailure(route, ex, canSwitch);
                if (!emitted.get() && canSwitch && index < routes.size() - 1) {
                    log.warn("LLM 路由流异常完成,自动切换,current={}", route.tag());
                    attemptStream(routes, index + 1, req, onChunk, onComplete, onError);
                } else {
                    if (onError != null) onError.accept(ex);
                }
            } else {
                markSuccess(route);
                doneSeen.set(true);
            }
        });
    }
    private Flux<String> streamFlux(ResolvedRoute route, ChatCompletionRequest req) {
        WebClient client = WebClient.builder().baseUrl(route.baseUrl).build();
        return client.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + route.apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .exchangeToFlux(resp -> {
                    int status = resp.rawStatusCode();
                    if (status >= 200 && status < 300) {
                        return resp.bodyToFlux(String.class);
                    }
                    return resp.bodyToMono(String.class)
                            .defaultIfEmpty("")
                            .flatMapMany(body -> Flux.error(new LlmRouteException(status, body)));
                })
                .doOnError(ex -> log.error("调用 LLM 流式失败, route={}", route.tag(), ex));
    }
    private ChatCompletionResponse callCompletion(ResolvedRoute route, ChatCompletionRequest req) {
        WebClient client = WebClient.builder().baseUrl(route.baseUrl).build();
        RawCompletionResult raw = client.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + route.apiKey)
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .exchangeToMono(resp -> resp.bodyToFlux(String.class)
                        .collectList()
                        .map(list -> new RawCompletionResult(resp.rawStatusCode(), String.join("\\n\\n", list))))
                .block();
        if (raw == null) {
            throw new RuntimeException("LLM 返回为空");
        }
        if (raw.statusCode < 200 || raw.statusCode >= 300) {
            throw new LlmRouteException(raw.statusCode, raw.payload);
        }
        return parseCompletion(raw.payload);
    }
    private ChatCompletionRequest applyRoute(ChatCompletionRequest req, ResolvedRoute route, boolean stream) {
        req.setModel(route.model);
        req.setStream(stream);
        if (route.thinkingEnabled) {
            ChatCompletionRequest.Thinking t = new ChatCompletionRequest.Thinking();
            t.setType("enable");
            req.setThinking(t);
        } else {
            req.setThinking(null);
        }
        return req;
    }
    private ChatCompletionRequest cloneRequest(ChatCompletionRequest src) {
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(src.getModel());
        req.setMessages(src.getMessages());
        req.setTemperature(src.getTemperature());
        req.setMax_tokens(src.getMax_tokens());
        req.setStream(src.getStream());
        req.setTools(src.getTools());
        req.setTool_choice(src.getTool_choice());
        req.setThinking(src.getThinking());
        return req;
    }
    private boolean isValidCompletion(ChatCompletionResponse response) {
        if (response == null || response.getChoices() == null || response.getChoices().isEmpty()) {
            return false;
        }
        ChatCompletionRequest.Message message = response.getChoices().get(0).getMessage();
        if (message == null) {
            return false;
        }
        if (!isBlank(message.getContent())) {
            return true;
        }
        return message.getTool_calls() != null && !message.getTool_calls().isEmpty();
    }
    private boolean shouldSwitch(ResolvedRoute route, boolean quota) {
        return quota ? route.switchOnQuota : route.switchOnError;
    }
    private void markSuccess(ResolvedRoute route) {
        if (route.id != null) {
            llmRoutingService.markSuccess(route.id);
        }
    }
    private void markFailure(ResolvedRoute route, Throwable ex, boolean enterCooldown) {
        if (route.id != null) {
            llmRoutingService.markFailure(route.id, errorText(ex), enterCooldown, route.cooldownSeconds);
        }
    }
    private String errorText(Throwable ex) {
        if (ex == null) return "unknown";
        if (ex instanceof LlmRouteException) {
            LlmRouteException e = (LlmRouteException) ex;
            String body = e.body == null ? "" : e.body;
            if (body.length() > 240) {
                body = body.substring(0, 240);
            }
            return "status=" + e.statusCode + ", body=" + body;
        }
        return ex.getMessage() == null ? ex.toString() : ex.getMessage();
    }
    private boolean isQuotaExhausted(Throwable ex) {
        if (!(ex instanceof LlmRouteException)) return false;
        LlmRouteException e = (LlmRouteException) ex;
        if (e.statusCode == 429) return true;
        String text = (e.body == null ? "" : e.body).toLowerCase();
        return text.contains("insufficient_quota")
                || text.contains("quota")
                || text.contains("余额")
                || text.contains("用量")
                || text.contains("超限")
                || text.contains("rate limit");
    }
    private List<ResolvedRoute> resolveRoutes() {
        List<ResolvedRoute> routes = new ArrayList<>();
        List<LlmRouteConfig> dbRoutes = llmRoutingService.listAvailableRoutes();
        for (LlmRouteConfig c : dbRoutes) {
            routes.add(ResolvedRoute.fromDb(c));
        }
        // 兼容:数据库为空时,回退到 yml
        if (routes.isEmpty() && !isBlank(fallbackBaseUrl) && !isBlank(fallbackApiKey) && !isBlank(fallbackModel)) {
            routes.add(ResolvedRoute.fromFallback(fallbackBaseUrl, fallbackApiKey, fallbackModel, isFallbackThinkingEnabled()));
        }
        return routes;
    }
    private boolean isFallbackThinkingEnabled() {
        String x = fallbackThinking == null ? "" : fallbackThinking.trim().toLowerCase();
        return "true".equals(x) || "1".equals(x) || "enable".equals(x);
    }
    private boolean isBlank(String s) {
        return s == null || s.trim().isEmpty();
    }
    private ChatCompletionResponse mergeSseChunk(ChatCompletionResponse acc, String payload) {
@@ -452,7 +448,7 @@
                        ChatCompletionResponse.Choice choice = new ChatCompletionResponse.Choice();
                        ChatCompletionRequest.Message msg = new ChatCompletionRequest.Message();
                        choice.setMessage(msg);
                        java.util.ArrayList<ChatCompletionResponse.Choice> list = new java.util.ArrayList<>();
                        ArrayList<ChatCompletionResponse.Choice> list = new ArrayList<>();
                        list.add(choice);
                        acc.setChoices(list);
                    }
@@ -490,7 +486,8 @@
                if (created != null) acc.setCreated(created);
                String object = obj.getString("object");
                if (object != null && !object.isEmpty()) acc.setObjectName(object);
            } catch (Exception ignore) {}
            } catch (Exception ignore) {
            }
        }
        return acc;
    }
@@ -502,7 +499,8 @@
            if (r != null && r.getChoices() != null && !r.getChoices().isEmpty() && r.getChoices().get(0).getMessage() != null) {
                return r;
            }
        } catch (Exception ignore) {}
        } catch (Exception ignore) {
        }
        ChatCompletionResponse sse = mergeSseChunk(new ChatCompletionResponse(), payload);
        if (sse.getChoices() != null && !sse.getChoices().isEmpty() && sse.getChoices().get(0).getMessage() != null && sse.getChoices().get(0).getMessage().getContent() != null) {
            return sse;
@@ -513,9 +511,75 @@
        msg.setRole("assistant");
        msg.setContent(payload);
        choice.setMessage(msg);
        java.util.ArrayList<ChatCompletionResponse.Choice> list = new java.util.ArrayList<>();
        ArrayList<ChatCompletionResponse.Choice> list = new ArrayList<>();
        list.add(choice);
        r.setChoices(list);
        return r;
    }
    private static class RawCompletionResult {
        private final int statusCode;
        private final String payload;
        private RawCompletionResult(int statusCode, String payload) {
            this.statusCode = statusCode;
            this.payload = payload;
        }
    }
    private static class LlmRouteException extends RuntimeException {
        private final int statusCode;
        private final String body;
        private LlmRouteException(int statusCode, String body) {
            super("http status=" + statusCode);
            this.statusCode = statusCode;
            this.body = body;
        }
    }
    private static class ResolvedRoute {
        private Long id;
        private String name;
        private String baseUrl;
        private String apiKey;
        private String model;
        private boolean thinkingEnabled;
        private boolean switchOnQuota;
        private boolean switchOnError;
        private Integer cooldownSeconds;
        private static ResolvedRoute fromDb(LlmRouteConfig c) {
            ResolvedRoute r = new ResolvedRoute();
            r.id = c.getId();
            r.name = c.getName();
            r.baseUrl = c.getBaseUrl();
            r.apiKey = c.getApiKey();
            r.model = c.getModel();
            r.thinkingEnabled = c.getThinking() != null && c.getThinking() == 1;
            r.switchOnQuota = c.getSwitchOnQuota() == null || c.getSwitchOnQuota() == 1;
            r.switchOnError = c.getSwitchOnError() == null || c.getSwitchOnError() == 1;
            r.cooldownSeconds = c.getCooldownSeconds();
            return r;
        }
        private static ResolvedRoute fromFallback(String baseUrl, String apiKey, String model, boolean thinkingEnabled) {
            ResolvedRoute r = new ResolvedRoute();
            r.name = "fallback-yml";
            r.baseUrl = baseUrl;
            r.apiKey = apiKey;
            r.model = model;
            r.thinkingEnabled = thinkingEnabled;
            r.switchOnQuota = true;
            r.switchOnError = true;
            r.cooldownSeconds = 300;
            return r;
        }
        private String tag() {
            String showName = name == null ? "unnamed" : name;
            String showModel = model == null ? "" : (" model=" + model);
            return showName + showModel;
        }
    }
}