From 5e492e5d5a2b743e2e99443220d343f72a633f6d Mon Sep 17 00:00:00 2001
From: Junjie <fallin.jie@qq.com>
Date: 星期二, 03 三月 2026 16:57:52 +0800
Subject: [PATCH] #
---
src/main/java/com/zy/ai/service/LlmRoutingService.java | 286 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 286 insertions(+), 0 deletions(-)
diff --git a/src/main/java/com/zy/ai/service/LlmRoutingService.java b/src/main/java/com/zy/ai/service/LlmRoutingService.java
new file mode 100644
index 0000000..96c4805
--- /dev/null
+++ b/src/main/java/com/zy/ai/service/LlmRoutingService.java
@@ -0,0 +1,286 @@
+package com.zy.ai.service;
+
+import com.baomidou.mybatisplus.mapper.EntityWrapper;
+import com.zy.ai.entity.LlmRouteConfig;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.http.HttpHeaders;
+import org.springframework.http.MediaType;
+import org.springframework.stereotype.Service;
+import org.springframework.web.reactive.function.client.WebClient;
+import reactor.core.publisher.Mono;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class LlmRoutingService {
+
+ private static final long CACHE_TTL_MS = 3000L;
+
+ private final LlmRouteConfigService llmRouteConfigService;
+
+ private volatile List<LlmRouteConfig> allRouteCache = Collections.emptyList();
+ private volatile long cacheExpireAt = 0L;
+ private static final Comparator<LlmRouteConfig> ROUTE_ORDER = (a, b) -> {
+ int pa = a == null || a.getPriority() == null ? Integer.MAX_VALUE : a.getPriority();
+ int pb = b == null || b.getPriority() == null ? Integer.MAX_VALUE : b.getPriority();
+ if (pa != pb) return Integer.compare(pa, pb);
+ long ia = a == null || a.getId() == null ? Long.MAX_VALUE : a.getId();
+ long ib = b == null || b.getId() == null ? Long.MAX_VALUE : b.getId();
+ return Long.compare(ia, ib);
+ };
+
+ public void evictCache() {
+ cacheExpireAt = 0L;
+ }
+
+ public List<LlmRouteConfig> listAllOrdered() {
+ return new ArrayList<>(loadAllRoutes());
+ }
+
+ public List<LlmRouteConfig> listAvailableRoutes() {
+ Date now = new Date();
+ List<LlmRouteConfig> result = new ArrayList<>();
+ List<LlmRouteConfig> coolingRoutes = new ArrayList<>();
+ int total = 0;
+ int disabled = 0;
+ int invalid = 0;
+ for (LlmRouteConfig c : loadAllRoutes()) {
+ total++;
+ if (!isEnabled(c)) {
+ disabled++;
+ continue;
+ }
+ if (isBlank(c.getBaseUrl()) || isBlank(c.getApiKey()) || isBlank(c.getModel())) {
+ invalid++;
+ continue;
+ }
+ if (isCooling(c, now)) {
+ coolingRoutes.add(c);
+ continue;
+ }
+ result.add(c);
+ }
+ if (result.isEmpty() && !coolingRoutes.isEmpty()) {
+ // 閬垮厤鎵�鏈夎矾鐢遍兘澶勪簬鍐峰嵈鏃剁郴缁熷畬鍏ㄤ笉鍙敤锛岄檷绾у厑璁镐娇鐢ㄥ喎鍗磋矾鐢�
+ coolingRoutes.sort(ROUTE_ORDER);
+ log.warn("LLM 璺敱鍧囧浜庡喎鍗达紝闄嶇骇鍚敤鍐峰嵈璺敱銆俢ooling={}, total={}", coolingRoutes.size(), total);
+ return coolingRoutes;
+ }
+ result.sort(ROUTE_ORDER);
+ if (result.isEmpty()) {
+ log.warn("鏈壘鍒板彲鐢� LLM 璺敱銆倀otal={}, disabled={}, invalid={}", total, disabled, invalid);
+ }
+ return result;
+ }
+
+ public void markSuccess(Long routeId) {
+ if (routeId == null) return;
+ try {
+ LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
+ if (db == null) return;
+ db.setSuccessCount(nvl(db.getSuccessCount()) + 1);
+ db.setConsecutiveFailCount(0);
+ db.setLastUsedTime(new Date());
+ db.setUpdateTime(new Date());
+ llmRouteConfigService.updateById(db);
+ evictCache();
+ } catch (Exception e) {
+ log.warn("鏇存柊璺敱鎴愬姛鐘舵�佸け璐�, routeId={}", routeId, e);
+ }
+ }
+
+ public void markFailure(Long routeId, String errorText, boolean enterCooldown, Integer cooldownSeconds) {
+ if (routeId == null) return;
+ try {
+ LlmRouteConfig db = llmRouteConfigService.selectById(routeId);
+ if (db == null) return;
+ Date now = new Date();
+ db.setFailCount(nvl(db.getFailCount()) + 1);
+ db.setConsecutiveFailCount(nvl(db.getConsecutiveFailCount()) + 1);
+ db.setLastFailTime(now);
+ db.setLastError(trimError(errorText));
+ if (enterCooldown) {
+ int sec = cooldownSeconds != null && cooldownSeconds > 0
+ ? cooldownSeconds
+ : defaultCooldown(db.getCooldownSeconds());
+ db.setCooldownUntil(new Date(now.getTime() + sec * 1000L));
+ }
+ db.setUpdateTime(now);
+ llmRouteConfigService.updateById(db);
+ evictCache();
+ } catch (Exception e) {
+ log.warn("鏇存柊璺敱澶辫触鐘舵�佸け璐�, routeId={}", routeId, e);
+ }
+ }
+
+ private int defaultCooldown(Integer sec) {
+ return sec == null || sec <= 0 ? 300 : sec;
+ }
+
+ private String trimError(String err) {
+ if (err == null) return null;
+ String x = err.replace("\n", " ").replace("\r", " ");
+ return x.length() > 500 ? x.substring(0, 500) : x;
+ }
+
+ private Integer nvl(Integer x) {
+ return x == null ? 0 : x;
+ }
+
+ private boolean isEnabled(LlmRouteConfig c) {
+ return c != null && c.getStatus() != null && c.getStatus() == 1;
+ }
+
+ private boolean isCooling(LlmRouteConfig c, Date now) {
+ return c != null && c.getCooldownUntil() != null && c.getCooldownUntil().after(now);
+ }
+
+ private List<LlmRouteConfig> loadAllRoutes() {
+ long now = System.currentTimeMillis();
+ if (now < cacheExpireAt && allRouteCache != null) {
+ return allRouteCache;
+ }
+ synchronized (this) {
+ now = System.currentTimeMillis();
+ if (now < cacheExpireAt && allRouteCache != null) {
+ return allRouteCache;
+ }
+ EntityWrapper<LlmRouteConfig> wrapper = new EntityWrapper<>();
+ wrapper.orderBy("priority", true).orderBy("id", true);
+ List<LlmRouteConfig> list = llmRouteConfigService.selectList(wrapper);
+ if (list == null) {
+ allRouteCache = Collections.emptyList();
+ } else {
+ list.sort(ROUTE_ORDER);
+ allRouteCache = list;
+ }
+ cacheExpireAt = System.currentTimeMillis() + CACHE_TTL_MS;
+ return allRouteCache;
+ }
+ }
+
+ private String safe(String s) {
+ return s == null ? "" : s.trim();
+ }
+
+ private boolean isBlank(String s) {
+ return s == null || s.trim().isEmpty();
+ }
+
+ public LlmRouteConfig fillAndNormalize(LlmRouteConfig cfg, boolean isCreate) {
+ Date now = new Date();
+ if (isBlank(cfg.getName())) {
+ cfg.setName("LLM_ROUTE_" + now.getTime());
+ }
+ if (cfg.getThinking() == null) {
+ cfg.setThinking((short) 0);
+ }
+ if (cfg.getPriority() == null) {
+ cfg.setPriority(100);
+ }
+ if (cfg.getStatus() == null) {
+ cfg.setStatus((short) 1);
+ }
+ if (cfg.getSwitchOnQuota() == null) {
+ cfg.setSwitchOnQuota((short) 1);
+ }
+ if (cfg.getSwitchOnError() == null) {
+ cfg.setSwitchOnError((short) 1);
+ }
+ if (cfg.getCooldownSeconds() == null || cfg.getCooldownSeconds() < 0) {
+ cfg.setCooldownSeconds(300);
+ }
+ if (cfg.getFailCount() == null) {
+ cfg.setFailCount(0);
+ }
+ if (cfg.getSuccessCount() == null) {
+ cfg.setSuccessCount(0);
+ }
+ if (cfg.getConsecutiveFailCount() == null) {
+ cfg.setConsecutiveFailCount(0);
+ }
+ if (isCreate) {
+ cfg.setCreateTime(now);
+ }
+ cfg.setUpdateTime(now);
+ return cfg;
+ }
+
+ public Map<String, Object> testRoute(LlmRouteConfig cfg) {
+ HashMap<String, Object> result = new HashMap<>();
+ long start = System.currentTimeMillis();
+ try {
+ TestHttpResult raw = testJavaRoute(cfg);
+ fillTestResult(result, raw, start);
+ } catch (Exception e) {
+ result.put("ok", false);
+ result.put("statusCode", -1);
+ result.put("latencyMs", System.currentTimeMillis() - start);
+ result.put("message", "娴嬭瘯寮傚父: " + safe(e.getMessage()));
+ result.put("responseSnippet", "");
+ }
+ return result;
+ }
+
+ private void fillTestResult(HashMap<String, Object> result, TestHttpResult raw, long start) {
+ boolean ok = raw.statusCode >= 200 && raw.statusCode < 300;
+ result.put("ok", ok);
+ result.put("statusCode", raw.statusCode);
+ result.put("latencyMs", System.currentTimeMillis() - start);
+ result.put("message", ok ? "娴嬭瘯鎴愬姛" : "娴嬭瘯澶辫触");
+ result.put("responseSnippet", trimBody(raw.body));
+ }
+
+ private TestHttpResult testJavaRoute(LlmRouteConfig cfg) {
+ HashMap<String, Object> req = new HashMap<>();
+ req.put("model", cfg.getModel());
+ List<Map<String, String>> messages = new ArrayList<>();
+ HashMap<String, String> msg = new HashMap<>();
+ msg.put("role", "user");
+ msg.put("content", "ping");
+ messages.add(msg);
+ req.put("messages", messages);
+ req.put("stream", false);
+ req.put("max_tokens", 8);
+ req.put("temperature", 0);
+
+ WebClient client = WebClient.builder().baseUrl(cfg.getBaseUrl()).build();
+ return client.post()
+ .uri("/chat/completions")
+ .header(HttpHeaders.AUTHORIZATION, "Bearer " + cfg.getApiKey())
+ .contentType(MediaType.APPLICATION_JSON)
+ .accept(MediaType.APPLICATION_JSON, MediaType.TEXT_EVENT_STREAM)
+ .bodyValue(req)
+ .exchangeToMono(resp -> resp.bodyToMono(String.class)
+ .defaultIfEmpty("")
+ .map(body -> new TestHttpResult(resp.rawStatusCode(), body)))
+ .timeout(Duration.ofSeconds(12))
+ .onErrorResume(ex -> Mono.just(new TestHttpResult(-1, safe(ex.getMessage()))))
+ .block();
+ }
+
+ private String trimBody(String body) {
+ String x = safe(body).replace("\r", " ").replace("\n", " ");
+ return x.length() > 300 ? x.substring(0, 300) : x;
+ }
+
+ private static class TestHttpResult {
+ private final int statusCode;
+ private final String body;
+
+ private TestHttpResult(int statusCode, String body) {
+ this.statusCode = statusCode;
+ this.body = body;
+ }
+ }
+}
--
Gitblit v1.9.1