#
Junjie
昨天 8636ff97bffec9f2130628bf09c9d0fbb371e2bc
src/main/java/com/zy/ai/service/LlmRoutingService.java
@@ -1,17 +1,14 @@
package com.zy.ai.service;
import com.baomidou.mybatisplus.mapper.EntityWrapper;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.zy.ai.entity.ChatCompletionRequest;
import com.zy.ai.entity.LlmRouteConfig;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.publisher.Mono;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
@@ -26,9 +23,18 @@
    private static final long CACHE_TTL_MS = 3000L;
    private final LlmRouteConfigService llmRouteConfigService;
    private final LlmSpringAiClientService llmSpringAiClientService;
    private volatile List<LlmRouteConfig> allRouteCache = Collections.emptyList();
    private volatile long cacheExpireAt = 0L;
    private static final Comparator<LlmRouteConfig> ROUTE_ORDER = (a, b) -> {
        int pa = a == null || a.getPriority() == null ? Integer.MAX_VALUE : a.getPriority();
        int pb = b == null || b.getPriority() == null ? Integer.MAX_VALUE : b.getPriority();
        if (pa != pb) return Integer.compare(pa, pb);
        long ia = a == null || a.getId() == null ? Long.MAX_VALUE : a.getId();
        long ib = b == null || b.getId() == null ? Long.MAX_VALUE : b.getId();
        return Long.compare(ia, ib);
    };
    public void evictCache() {
        cacheExpireAt = 0L;
@@ -63,9 +69,11 @@
        }
        if (result.isEmpty() && !coolingRoutes.isEmpty()) {
            // 避免所有路由都处于冷却时系统完全不可用,降级允许使用冷却路由
            coolingRoutes.sort(ROUTE_ORDER);
            log.warn("LLM 路由均处于冷却,降级启用冷却路由。cooling={}, total={}", coolingRoutes.size(), total);
            return coolingRoutes;
        }
        result.sort(ROUTE_ORDER);
        if (result.isEmpty()) {
            log.warn("未找到可用 LLM 路由。total={}, disabled={}, invalid={}", total, disabled, invalid);
        }
@@ -75,7 +83,7 @@
    public void markSuccess(Long routeId) {
        if (routeId == null) return;
        try {
            LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
            LlmRouteConfig db = llmRouteConfigService.getById(routeId);
            if (db == null) return;
            db.setSuccessCount(nvl(db.getSuccessCount()) + 1);
            db.setConsecutiveFailCount(0);
@@ -91,7 +99,7 @@
    public void markFailure(Long routeId, String errorText, boolean enterCooldown, Integer cooldownSeconds) {
        if (routeId == null) return;
        try {
            LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
            LlmRouteConfig db = llmRouteConfigService.getById(routeId);
            if (db == null) return;
            Date now = new Date();
            db.setFailCount(nvl(db.getFailCount()) + 1);
@@ -144,10 +152,15 @@
            if (now < cacheExpireAt && allRouteCache != null) {
                return allRouteCache;
            }
            EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>();
            wrapper.orderBy("priority", true).orderBy("id", true);
            List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper);
            allRouteCache = list == null ? Collections.emptyList() : list;
            QueryWrapper<LlmRouteConfig> wrapper = new QueryWrapper<>();
            wrapper.orderBy(true, true, "priority").orderBy(true, true, "id");
            List<LlmRouteConfig> list = llmRouteConfigService.list(wrapper);
            if (list == null) {
                allRouteCache = Collections.emptyList();
            } else {
                list.sort(ROUTE_ORDER);
                allRouteCache = list;
            }
            cacheExpireAt = System.currentTimeMillis() + CACHE_TTL_MS;
            return allRouteCache;
        }
@@ -226,31 +239,31 @@
    }
    private TestHttpResult testJavaRoute(LlmRouteConfig cfg) {
        HashMap<String, Object> req = new HashMap<>();
        req.put("model", cfg.getModel());
        List<Map<String, String>> messages = new ArrayList<>();
        HashMap<String, String> msg = new HashMap<>();
        msg.put("role", "user");
        msg.put("content", "ping");
        ChatCompletionRequest req = new ChatCompletionRequest();
        req.setModel(cfg.getModel());
        List<ChatCompletionRequest.Message> messages = new ArrayList<>();
        ChatCompletionRequest.Message msg = new ChatCompletionRequest.Message();
        msg.setRole("user");
        msg.setContent("ping");
        messages.add(msg);
        req.put("messages", messages);
        req.put("stream", false);
        req.put("max_tokens", 8);
        req.put("temperature", 0);
        WebClient client = WebClient.builder().baseUrl(cfg.getBaseUrl()).build();
        return client.post()
                .uri("/chat/completions")
                .header(HttpHeaders.AUTHORIZATION, "Bearer " + cfg.getApiKey())
                .contentType(MediaType.APPLICATION_JSON)
                .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
                .bodyValue(req)
                .exchangeToMono(resp -> resp.bodyToMono(String.class)
                        .defaultIfEmpty("")
                        .map(body -> new TestHttpResult(resp.rawStatusCode(), body)))
                .timeout(Duration.ofSeconds(12))
                .onErrorResume(ex -> Mono.just(new TestHttpResult(-1, safe(ex.getMessage()))))
                .block();
        req.setMessages(messages);
        req.setStream(false);
        req.setMax_tokens(8);
        req.setTemperature(0D);
        if (cfg.getThinking() != null && cfg.getThinking() == 1) {
            ChatCompletionRequest.Thinking thinking = new ChatCompletionRequest.Thinking();
            thinking.setType("enable");
            req.setThinking(thinking);
        }
        try {
            LlmSpringAiClientService.CompletionCallResult result =
                    llmSpringAiClientService.callCompletion(cfg.getBaseUrl(), cfg.getApiKey(), req);
            return new TestHttpResult(result.getStatusCode(), result.getPayload());
        } catch (Throwable ex) {
            Integer statusCode = llmSpringAiClientService.statusCodeOf(ex);
            String body = llmSpringAiClientService.responseBodyOf(ex, 300);
            return new TestHttpResult(statusCode == null ? -1 : statusCode, safe(body != null ? body : ex.getMessage()));
        }
    }
    private String trimBody(String body) {