diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 744c7803e..6f47bfb65 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -76,6 +76,11 @@ jobs: env: GITHUB_TOKEN: ${{ env.RTK_INSTALL_GITHUB_TOKEN }} + - name: Install and verify DuckDB VSS for Windows + run: | + pnpm run installRuntime:duckdb:vss -- --platform win32 --arch ${{ matrix.arch }} + pnpm run smoke:duckdb:vss -- --platform win32 --arch ${{ matrix.arch }} + - name: Build Windows shell: bash run: | @@ -89,6 +94,13 @@ jobs: VITE_GITHUB_REDIRECT_URI: ${{ secrets.DC_GITHUB_REDIRECT_URI }} VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify packaged DuckDB VSS for Windows + shell: bash + run: | + EXTENSION_PATH="dist/${{ matrix.unpacked }}/resources/app.asar.unpacked/runtime/duckdb/extensions/vss.duckdb_extension" + test -f "$EXTENSION_PATH" + pnpm run smoke:duckdb:vss -- --platform win32 --arch ${{ matrix.arch }} --extension-path "$EXTENSION_PATH" + - name: Verify bundled plugins shell: bash run: | @@ -142,6 +154,11 @@ jobs: # - name: Install Node Runtime # run: pnpm run installRuntime:linux:${{ matrix.arch }} + - name: Install and verify DuckDB VSS for Linux + run: | + pnpm run installRuntime:duckdb:vss -- --platform linux --arch ${{ matrix.arch }} + pnpm run smoke:duckdb:vss -- --platform linux --arch ${{ matrix.arch }} + - name: Build Linux run: | pnpm run build @@ -154,6 +171,13 @@ jobs: VITE_GITHUB_REDIRECT_URI: ${{ secrets.DC_GITHUB_REDIRECT_URI }} VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify packaged DuckDB VSS for Linux + shell: bash + run: | + EXTENSION_PATH="dist/linux-unpacked/resources/app.asar.unpacked/runtime/duckdb/extensions/vss.duckdb_extension" + test -f "$EXTENSION_PATH" + pnpm run smoke:duckdb:vss -- --platform linux --arch ${{ matrix.arch }} --extension-path "$EXTENSION_PATH" + - name: Verify bundled plugins shell: bash run: | @@ -170,15 +194,17 @@ jobs: build-mac: if: github.event.inputs.platform == 'all' || contains(github.event.inputs.platform, 'mac') - runs-on: macos-15 + runs-on: ${{ matrix.runner }} strategy: matrix: arch: [x64, arm64] include: - arch: x64 platform: mac-x64 + runner: macos-15-intel - arch: arm64 platform: mac-arm64 + runner: macos-15 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -215,6 +241,11 @@ jobs: env: GITHUB_TOKEN: ${{ env.RTK_INSTALL_GITHUB_TOKEN }} + - name: Install and verify DuckDB VSS for macOS + run: | + pnpm run installRuntime:duckdb:vss -- --platform darwin --arch ${{ matrix.arch }} + pnpm run smoke:duckdb:vss -- --platform darwin --arch ${{ matrix.arch }} + - name: Build Mac run: | pnpm run build @@ -234,6 +265,19 @@ jobs: NODE_OPTIONS: '--max-old-space-size=4096' VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify packaged DuckDB VSS for macOS + shell: bash + env: + TARGET_ARCH: ${{ matrix.arch }} + run: | + APP_DIR="dist/mac/DeepChat.app" + if [ "$TARGET_ARCH" = "arm64" ]; then + APP_DIR="dist/mac-arm64/DeepChat.app" + fi + EXTENSION_BASE64_PATH="${APP_DIR}/Contents/Resources/app.asar.unpacked/runtime/duckdb/extensions/vss.duckdb_extension.b64" + test -f "$EXTENSION_BASE64_PATH" + pnpm run smoke:duckdb:vss -- --platform darwin --arch "$TARGET_ARCH" --extension-base64-path "$EXTENSION_BASE64_PATH" + - name: Verify bundled plugins shell: bash env: diff --git a/.github/workflows/prcheck.yml b/.github/workflows/prcheck.yml index 6ddfd6b9d..64c42f543 100644 --- a/.github/workflows/prcheck.yml +++ b/.github/workflows/prcheck.yml @@ -63,6 +63,11 @@ jobs: - name: Install dependencies run: pnpm install + - name: Install and verify DuckDB VSS + run: | + pnpm run installRuntime:duckdb:vss:linux:x64 + pnpm run smoke:duckdb:vss -- --platform linux --arch x64 + - name: lint run: pnpm run lint diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d93ea4ec4..dc3ec91fc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -170,6 +170,11 @@ jobs: env: GITHUB_TOKEN: ${{ env.RTK_INSTALL_GITHUB_TOKEN }} + - name: Install and verify DuckDB VSS for Windows + run: | + pnpm run installRuntime:duckdb:vss -- --platform win32 --arch ${{ matrix.arch }} + pnpm run smoke:duckdb:vss -- --platform win32 --arch ${{ matrix.arch }} + - name: Build Windows run: | pnpm run build @@ -183,6 +188,13 @@ jobs: VITE_GITHUB_REDIRECT_URI: ${{ secrets.DC_GITHUB_REDIRECT_URI }} VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify packaged DuckDB VSS for Windows + shell: bash + run: | + EXTENSION_PATH="dist/${{ matrix.unpacked }}/resources/app.asar.unpacked/runtime/duckdb/extensions/vss.duckdb_extension" + test -f "$EXTENSION_PATH" + pnpm run smoke:duckdb:vss -- --platform win32 --arch ${{ matrix.arch }} --extension-path "$EXTENSION_PATH" + - name: Verify bundled plugins shell: bash run: | @@ -235,6 +247,11 @@ jobs: - name: Install dependencies run: pnpm install + - name: Install and verify DuckDB VSS for Linux + run: | + pnpm run installRuntime:duckdb:vss -- --platform linux --arch ${{ matrix.arch }} + pnpm run smoke:duckdb:vss -- --platform linux --arch ${{ matrix.arch }} + - name: Build Linux run: | pnpm run build @@ -248,6 +265,13 @@ jobs: VITE_GITHUB_REDIRECT_URI: ${{ secrets.DC_GITHUB_REDIRECT_URI }} VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify packaged DuckDB VSS for Linux + shell: bash + run: | + EXTENSION_PATH="dist/linux-unpacked/resources/app.asar.unpacked/runtime/duckdb/extensions/vss.duckdb_extension" + test -f "$EXTENSION_PATH" + pnpm run smoke:duckdb:vss -- --platform linux --arch ${{ matrix.arch }} --extension-path "$EXTENSION_PATH" + - name: Verify bundled plugins shell: bash run: | @@ -264,15 +288,17 @@ jobs: build-mac: needs: [resolve-tag, validate-main-ancestor] - runs-on: macos-15 + runs-on: ${{ matrix.runner }} strategy: matrix: arch: [x64, arm64] include: - arch: x64 platform: mac-x64 + runner: macos-15-intel - arch: arm64 platform: mac-arm64 + runner: macos-15 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -311,6 +337,11 @@ jobs: env: GITHUB_TOKEN: ${{ env.RTK_INSTALL_GITHUB_TOKEN }} + - name: Install and verify DuckDB VSS for macOS + run: | + pnpm run installRuntime:duckdb:vss -- --platform darwin --arch ${{ matrix.arch }} + pnpm run smoke:duckdb:vss -- --platform darwin --arch ${{ matrix.arch }} + - name: Build Mac run: | pnpm run build @@ -331,6 +362,19 @@ jobs: NODE_OPTIONS: '--max-old-space-size=4096' VITE_PROVIDER_DB_URL: ${{ secrets.CDN_PROVIDER_DB_URL }} + - name: Verify packaged DuckDB VSS for macOS + shell: bash + env: + TARGET_ARCH: ${{ matrix.arch }} + run: | + APP_DIR="dist/mac/DeepChat.app" + if [ "$TARGET_ARCH" = "arm64" ]; then + APP_DIR="dist/mac-arm64/DeepChat.app" + fi + EXTENSION_BASE64_PATH="${APP_DIR}/Contents/Resources/app.asar.unpacked/runtime/duckdb/extensions/vss.duckdb_extension.b64" + test -f "$EXTENSION_BASE64_PATH" + pnpm run smoke:duckdb:vss -- --platform darwin --arch "$TARGET_ARCH" --extension-base64-path "$EXTENSION_BASE64_PATH" + - name: Verify bundled plugins shell: bash env: diff --git a/.github/workflows/windows-arm64-e2e.yml b/.github/workflows/windows-arm64-e2e.yml index 28bafa8cc..c2ef3c7e2 100644 --- a/.github/workflows/windows-arm64-e2e.yml +++ b/.github/workflows/windows-arm64-e2e.yml @@ -44,7 +44,9 @@ jobs: run: pnpm run installRuntime:win:arm64 - name: Verify DuckDB and VSS on Windows arm64 - run: pnpm run smoke:duckdb:vss + run: | + pnpm run installRuntime:duckdb:vss:win:arm64 + pnpm run smoke:duckdb:vss -- --platform win32 --arch arm64 - name: Build Windows arm64 package run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index f508ffb6c..6e63ed78e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## v1.0.8-beta.1 (2026-06-29) +- Added the main-window Plugins Hub plus Feishu/Lark install authentication and streaming card delivery for remote control +- Upgraded the provider/runtime stack with AI SDK v7, Zod v4 schemas, Electron 40.10.5, DuckDB 1.5.4, and refreshed toolchains +- Fixed New API responses handling and endpoint debug selection, memory first-turn stalls, context-overflow auto-handoff, message-scoped skill activation, and request preview editor layout +- 新增主窗口插件中心,并支持飞书/Lark 安装认证与远程控制流式卡片推送 +- 升级 provider/runtime 栈到 AI SDK v7、Zod v4 schema、Electron 40.10.5、DuckDB 1.5.4,并刷新工具链 +- 修复 New API responses 处理与 endpoint debug 选择、memory 首轮卡顿、context overflow 自动 handoff、按消息激活 skill,以及请求预览编辑器布局 + ## v1.0.7 (2026-06-25) - Added the default chat workspace, task-aware Agent Memory, and persistent agent plan blocks in chat history - Improved Computer Use helper runtime isolation, packaging, shutdown cleanup, and refreshed bundled dependencies and resources diff --git a/docs/architecture/agent-plan-task-refactor/acp-plan-reachability.md b/docs/architecture/agent-plan-task-refactor/acp-plan-reachability.md deleted file mode 100644 index b0bacec4b..000000000 --- a/docs/architecture/agent-plan-task-refactor/acp-plan-reachability.md +++ /dev/null @@ -1,24 +0,0 @@ -# ACP Plan Reachability Audit - -## Summary - -- Subsystem A (`llmProviderPresenter/providers/acpProvider.ts`) is the active ACP provider stream - path. It calls `AcpContentMapper.map(notification)` and pushes `mapped.events` into the provider - `EventQueue`. -- Subsystem A previously dropped `mapped.blocks`. This refactor adds an internal `LLMCoreStreamEvent` - `type:'plan'` variant and accumulator handling that upserts the same shared `type:'plan'` block - shape, so active ACP provider streams can persist plan blocks without inventing an IPC channel. -- Subsystem B (`acpClientPresenter/mapper/AcpEventMapper.ts`) maps `mapped.blocks` to - `content.block` and `mapped.planEntries` to `plan.updated`, but repo grep finds no - `mapSessionUpdate` call site. It is instantiated by `acpClientPresenter/index.ts`, but not proven - live end-to-end. -- `MessageBlockPlan` remains the single renderer. `AcpContentMapper.handlePlanUpdate` now uses the - shared plan-block builder, so ACP subsystem A, ACP subsystem B, and the agent-runtime path produce - the same `type:'plan'` block shape. - -## Decision For This Refactor - -- Do not delete `MessageBlockPlan`. -- Keep the new internal `plan` stream event scoped to provider-to-accumulator transport. -- Keep subsystem B's block-capable mapper on the shared builder shape. -- Do not add a public IPC channel or a dedicated plan table for ACP plans. diff --git a/docs/architecture/agent-plan-task-refactor/plan.md b/docs/architecture/agent-plan-task-refactor/plan.md deleted file mode 100644 index 3fced9fb6..000000000 --- a/docs/architecture/agent-plan-task-refactor/plan.md +++ /dev/null @@ -1,300 +0,0 @@ -# Agent Plan / `update_plan` Task — Plan (v4) - -> Active planning doc. Delete after implementation; fold durable facts into -> `docs/architecture/agent-system.md` or `tool-system.md`. v4 broadens the terminal marker to cover -> **every** turn-exit incl. the abort-exception early return (AD6), threads `max_steps` via -> `StreamState`, and clarifies that ACP and agent-runtime share one block-builder helper, not one -> entry point (AD2). - -## Involved modules - -| Layer | File | Role today | -| --- | --- | --- | -| Backend tool | `src/main/presenter/toolPresenter/agentTools/agentPlanTool.ts` | `update_plan` def, `states` Map, snapshot/revision | -| Tool wiring | `src/main/presenter/toolPresenter/agentTools/agentToolManager.ts` | agent-mode gating (`isAgentMode`, :373), tool-call routing | -| Prompt | `src/main/presenter/toolPresenter/index.ts` | `buildProgressPrompt` (:641-657) | -| Runtime | `src/main/presenter/agentRuntimePresenter/dispatch.ts` | `markInternalPlanToolCallBlock` (:371), `publishPlanUpdated` (:385), `agent_plan` branch (:916), `finalize` (:1475) / `finalizeError` (:1489) | -| Runtime loop / finalize | `src/main/presenter/agentRuntimePresenter/process.ts` | `while(true)` loop (:327), `MAX_TOOL_CALLS` break→`finalize` (:404), `finalizeError` calls (:95,440,504,512,538), **abort-exception early return (:527-535, bypasses finalize)** | -| Error finalize | `src/main/presenter/agentRuntimePresenter/messageStore.ts` | `buildTerminalErrorBlocks` (:39) flips block `status`→`error` only — does **not** touch `plan_entries[*].status`; interrupted recovery (:615) | -| Runtime lifecycle | `src/main/presenter/agentRuntimePresenter/index.ts` | `destroySession` (:528-554), cancel/abort | -| Runtime state | `src/main/presenter/agentRuntimePresenter/types.ts` | `StreamState` — add `planTerminalReason` | -| ACP (subsystem A) | `llmProviderPresenter/acp/acpContentMapper.ts`, `providers/acpProvider.ts`, `aiSdk/.../accumulator.ts` | builds `type:'plan'` block; `acpProvider` drops `mapped.blocks` | -| ACP (subsystem B) | `acpClientPresenter/mapper/AcpEventMapper.ts` (instantiated at `acpClientPresenter/index.ts:18`) | maps `mapped.blocks`→`content.block`, `mapped.planEntries`→`plan.updated`; **`mapSessionUpdate` has no found call site** | -| Shared types | `src/shared/types/agent-plan.ts`, `src/shared/contracts/events/chat.events.ts`, `src/shared/contracts/acp.ts` | `AgentPlanStepStatus`, snapshot/item, event payloads | -| Store | `src/renderer/src/stores/ui/agentPlan.ts` | in-memory `snapshots`, persisted `collapsedBySession`, revision gate (:12) | -| UI (live) | `src/renderer/src/components/chat/AgentProgressFloat.vue` | the float in the screenshot | -| UI (persisted) | `src/renderer/src/components/message/MessageBlockPlan.vue` | `type:'plan'` renderer (`MessageItemAssistant.vue:69`); spins on `in_progress` (:139) | -| Wiring | `src/renderer/src/pages/ChatPage.vue` | `onPlanUpdated` (:1860), dismiss (:999), stop/retry/continue (:1736/1746/1797) | -| i18n | `src/renderer/src/i18n/*/chat.json` (`chat.workspace.plan.*`) | labels incl. dead `failed`/`skipped` | - -## Architecture decisions - -### AD1 — Single persisted representation = the `type:'plan'` block (D1, D4) -The persisted, in-history plan is a `type:'plan'` block rendered by `MessageBlockPlan.vue`. The live -`AgentProgressFloat` is a transient overlay during active generation; on reload it rehydrates from -the latest persisted plan block of the conversation. The hidden `update_plan` tool-call block stays -transport/provenance only (`extra.internalTool=true`, not double-rendered). - -The agent-runtime path **projects each `update_plan` snapshot into a persisted `type:'plan'` -block**. This intentionally changes the contract asserted by `dispatch.test.ts:299` ("does not -insert plan blocks"); that test is rewritten to assert the upsert behavior. - -**Upsert identity & position.** There is **at most one `type:'plan'` block per assistant message**. -The producer locates it by scanning the current turn's block stream (`state.blocks`) for a -`type:'plan'` block: if present it mutates that block in place; otherwise it inserts one -**immediately after the first (hidden) `update_plan` tool-call block** of the turn, then mutates that -same block in place for every later revision. Later revisions never move or duplicate it. Identity is -"the lone `type:'plan'` block within the active message" — **not** a `toolCallId` (which differs per -`update_plan` call) and **not** `messageId` lookups across the store. Because each turn (including -retry/continue) builds a fresh `state.blocks` for a new assistant message, a new turn yields a new -plan block — there is no cross-turn / cross-message overwrite. - -Why not "rehydrate the float from the hidden tool-call params" (rejected D4 alternative): it keeps -two renderers interpreting different sources and leaves `MessageBlockPlan` half-dead. One -`type:'plan'` block consumed by one renderer is the lower-divergence design and gives the ACP path a -home (AD2). - -### AD2 — Converge ACP and agent-runtime on one block shape + one builder (D2), after an audit -`AcpContentMapper.handlePlanUpdate` already builds a `type:'plan'` block. The agent-runtime -`update_plan` path and the ACP plan-notification path are **necessarily two entry points** — that is -fine. The constraint is: both must call **one shared plan-block construction/normalization helper** -that produces **one `type:'plan'` block shape**, rendered by the **single `MessageBlockPlan`** -renderer. Different entry points, one builder, one shape, one renderer. - -The audit (T1) establishes which ACP subsystem is live end-to-end: -- If subsystem B (`AcpEventMapper` → `content.block`) is the real path, ensure a `content.block` - carrying a `type:'plan'` block reaches the persisted message stream and renders via - `MessageBlockPlan`. -- If subsystem A (`acpProvider`) is the real path, it currently drops `mapped.blocks`; wire the - `type:'plan'` block through (and add a `'plan'` accumulator case if the persisted stream is rebuilt - there). -Either way the **renderer stays `MessageBlockPlan`** and both paths share the builder helper. No -deletion. - -### AD3 — Store models server-state and view-state separately, baseline per-turn (C1) -- `snapshots[sessionId]` is pure server-state for the **current turn's** live overlay. Add - `beginTurn(sessionId)` that resets the baseline (clears the live snapshot) — called from - submit/steer/retry/continue. The revision gate then only orders within-turn updates. -- Replace the boolean collapse map with per-session view-state `{ collapsed, dismissedRevision }` - (persisted). `dismiss` sets `dismissedRevision = current.revision` (sticky) instead of deleting. -- `freezeActive(sessionId)` (for `onStop`): set the live overlay's terminal indicator so the spinner - stops immediately. This is the **live mirror** of the persisted terminal marker (AD6); the source - of truth for reload is the stamped block, not this call. -- Float visibility = `snapshot exists && revision > dismissedRevision && entries.length > 0`. - Default `collapsed=false` on first appearance (AC19); auto-collapse (not delete) when - `completedCount === total` (AC6). -- `purge(sessionId)` removes the live snapshot + persisted view-state key (on conversation delete / - `destroySession`). - -### AD4 — One shared status presentation module (AC13) -`src/renderer/src/composables/useAgentPlanStatus.ts` exports `STATUS_ICON`, `STATUS_ICON_CLASS`, -`STATUS_BADGE_CLASS`, `entryAriaLabel(t, status, step)`, and a `resolveStepPresentation(status, -{ terminal })` helper that returns a **non-spinning** interrupted indicator for `in_progress` when -the plan is terminal (AD6). Both renderers import it; the completed-step decision is made once (mute -icon, keep text at `text-foreground` for AA — AC17). `MessageBlockPlan`'s ad-hoc `normalizeStatus`/ -`done`→`completed` tolerance moves here as a shared `normalizePlanEntry`. - -### AD5 — Status enum single source (AC12) -In a shared runtime-capable module: `export const agentPlanStepStatusSchema = z.enum(['pending', -'in_progress','completed'])`, `agentPlanItemSchema = z.object({ step, status })`, -`type AgentPlanStepStatus = z.infer<...>`. `agentPlanTool.ts` and `chat.events.ts` import these. -Remove `failed`/`skipped` i18n (AC21). The step-status enum stays three values (see AD6 for the -block-level terminal marker). - -### AD6 — Persistable terminal marker for abnormal/error termination (AC4) -The step-status enum is unchanged (AD5). To represent a turn that ended while a step was still -`in_progress`, add a **block/snapshot-level** field `terminalReason?: 'aborted' | 'max_steps' | -'error'`, persisted into the `type:'plan'` block `extra` (e.g. `plan_terminal_reason`) — additive, no -enum change (C3). - -**Crucial: cover every turn-exit, not just `finalizeError`/`finalize`.** Three exits can leave an -open `in_progress` step: -1. `finalizeError` (`dispatch.ts:1489`) — the error/cancel chokepoint reached from user cancel - (`process.ts:95`), tool terminal error (`:440`), context-window error (`:504`), no-model-response - (`:512`), a **non-abort** uncaught exception (`:538`), plus interrupted-session recovery - (`messageStore.ts:615`). Its `buildTerminalErrorBlocks` (`messageStore.ts:39`) only flips block - `status`→`error`; it does **not** touch `extra.plan_entries[*].status`. -2. The normal `finalize` (`dispatch.ts:1475`) after the `MAX_TOOL_CALLS` `break` (`process.ts:404`). -3. **The abort-exception early-return branch (`process.ts:527-535`)** — when `abortSignal.aborted || - isAbortError(err)`, the catch `return`s `{status:'aborted'}` **without calling `finalize` or - `finalizeError`**. Easy to miss; would leave the plan spinning on reload. - -Without handling all three, a persisted plan block keeps its `in_progress` entry and **reloads -spinning** — violating "no step spins after its turn ended". - -Implementation: add `state.planTerminalReason?: 'aborted' | 'max_steps' | 'error'` to `StreamState` -(`types.ts`); set it `= 'max_steps'` immediately before the `break` at `process.ts:404`. Introduce -one **idempotent** helper `stampPlanTerminalIfOpen(state, io, reason)` that finds the latest -`type:'plan'` block and, if any entry is still `in_progress` (and not already stamped), sets -`extra.plan_terminal_reason` and emits one final `chat.plan.updated`. Call it from: `finalize` -(reason = `state.planTerminalReason`, i.e. `max_steps`), `finalizeError` (reason `aborted` for -USER_CANCELED, else `error`), **and the abort-exception catch branch before its `return`** (reason -`aborted`). Idempotency makes a redundant call (e.g. an outer cancel path also invoking -`finalizeUserCanceledErrorIfNeeded`, `process.ts:90`) harmless. The shared presentation (AD4) renders -an `in_progress` step as a **static, non-spinning interrupted indicator** whenever `terminalReason` -is set. Normal, well-closed completion needs no marker (R7). `freezeActive` mirrors this in the store -for instant live feedback before persistence round-trips. - -**Persistence boundary (so reload, not just live, is fixed).** DB writes happen only in the finalize -family — `finalize` → `updateAssistantContent` (`dispatch.ts:1449`) / `finalizeAssistantMessage` -(`:1475`); `finalizeError` → `setMessageError` (`:1504`). Streaming itself only flushes to the -renderer (`flushBlocksToRenderer`, no DB write). Therefore: (a) at `finalize`/`finalizeError`, call -`stampPlanTerminalIfOpen` **before** the messageStore write so the stamp is persisted; (b) the -abort-exception early-return branch (`process.ts:527-535`) runs **no** finalize and **no** DB write — -after stamping it must itself persist (`messageStore.updateAssistantContent(io.messageId, -state.blocks)` + `flushBlocksToRenderer`) before returning. Without (b), "live not spinning" is fixed -but **"reload not spinning" is not**. - -Sequencing note: in Increment 1 (pre-persistence) `freezeActive` only stops the **live** spinner — -acceptable because there is no persisted plan block yet to reload. The persistable stamp + reload -non-spin lands in Increment 2, after the `type:'plan'` block exists (T5). - -## Event & data flow (target) - -``` -update_plan(call) - → AgentPlanTool: validate, build snapshot (revision = within-turn monotonic) - → onProgress(agent_plan) [single transport; drop toolResult.snapshot] - → dispatch.applyProgressUpdate (agent_plan, allowProgressUpdates): - • upsert THE type:'plan' block of this turn (the lone one in state.blocks) [NEW] - plan_entries / plan_explanation / plan_revision / plan_updated_at - (inserted right after the first hidden update_plan tool-call block) - • keep update_plan tool-call block extra.internalTool=true (provenance, hidden) - • publishDeepchatEvent('chat.plan.updated', payload) (live) - → ChatPage.onPlanUpdated → agentPlanStore.applySnapshot - -turn start (submit/steer/retry/continue): - → agentPlanStore.beginTurn(sessionId) [NEW: reset per-turn baseline → 0] - -any turn-exit with an open in_progress step → stampPlanTerminalIfOpen(state, io, reason) [NEW, idempotent] - • finalizeError (stop / tool-error / context-window / no-response / non-abort exception / interrupted) → error | aborted - • finalize after MAX_TOOL_CALLS break (state.planTerminalReason='max_steps' set before break) → max_steps - • abort-exception catch branch (process.ts:527-535) BEFORE its early return → aborted - (no finalize on this path → must also persist: updateAssistantContent + flushBlocksToRenderer) - → stamps latest type:'plan' block extra.plan_terminal_reason + one final chat.plan.updated - (at finalize/finalizeError: stamp BEFORE the messageStore write so the stamp is persisted) - → agentPlanStore.freezeActive(sessionId) [live mirror; non-spinning indicator] - -session load / reopen / switch: - → from loaded messages, take the latest type:'plan' block → agentPlanStore.applySnapshot [NEW] - (history always renders inline via MessageBlockPlan; float overlay optional) - -destroySession(sessionId) / conversation delete: - → planTool.clearState(sessionId) AND agentPlanStore.purge(sessionId) [NEW] -``` - -ACP path converges on the same `type:'plan'` block via the shared builder (AD2) after the audit. - -## Compatibility & migration - -- `type:'plan'` blocks and `block.extra` plan fields (`plan_entries`, `plan_terminal_reason`, …) are - additive; pre-change conversations have no plan block → rehydrate to "no plan" (C3). -- `collapsedBySession` localStorage (`agent-plan-collapsed`) gains a richer value shape. Per C4 / - project preference (no compat shims unless required), the decision is to **rename the key** to - `agent-plan-view-state` and one-time-prune the legacy `agent-plan-collapsed` key on first read, - rather than ship a legacy-boolean translation shim. -- **Backward-compatible typed-event extension** (not "no change"): `chat.plan.updated` gains an - optional `terminalReason`. Update the event-contract zod payload in `chat.events.ts:47-57` (add - `terminalReason: z.enum(['aborted','max_steps','error']).optional()`); `AgentPlanViewSnapshot = - DeepchatEventPayload<'chat.plan.updated'>` (`agentPlan.ts:6`) then derives the new field - automatically, and the contract's `defineEventContract` test must be updated. No route/IPC channel - change. If `AgentPlanViewSnapshot` is later re-expressed as `AgentPlanSnapshot + messageId`, that - is type-only. - -## Test strategy - -- **Main (Vitest):** - - `dispatch.test.ts:299` rewritten: `applyProgressUpdate` upserts the single `type:'plan'` block - of the turn (idempotent across revisions, never duplicated) and still publishes the event. - - abnormal/error termination: `finalizeError` (cancel, tool error, context-window, no-response, - non-abort exception), the `MAX_TOOL_CALLS` `finalize`, AND the **abort-exception early-return - branch** (plan written, then provider throws `AbortError`) each stamp `plan_terminal_reason` - (`aborted`/`max_steps`/`error`) on the latest plan block when a step is open and emit a final - event; for the abort-exception branch assert the stamp is **persisted to messageStore** (a - reload sees the non-spinning state), not just emitted. Assert no `in_progress` entry survives - unstamped after any such ending, and the helper is idempotent. - - `agentPlanTool`: clearState wiring; missing-`toolCallId` behavior; revision monotonic within turn. - - subagent path leaves no orphan state. - - ACP: per audit outcome, a `type:'plan'` block is produced/renderable through the live path via - the shared builder. -- **Renderer (Vitest + VTU):** - - store: per-turn `beginTurn` baseline (the **C1 guard**: clear → next `revision=1` must render), - `dismiss` sticky, `freezeActive`, `purge`, auto-collapse. - - presentation: `in_progress` with `terminalReason` set renders **without** `animate-spin` (live - float and inline block); completed-step uses `text-foreground` (not 50%-alpha muted); steps - container has `aria-live`. - - `AgentProgressFloat`: stop/retry/complete transitions leave no spinning `in_progress`; badge - i18n renders per-locale (`en`, `ja`) without concatenation artifacts. - - rehydration: a loaded conversation with a persisted plan block shows the plan (and a frozen one - does not spin); switching sessions isolates plans. - -## Review follow-up fixes - -The post-implementation review found one real live-path regression and several missing regression -tests. These are implemented as a follow-up to this same architecture goal. - -- Live terminal updates keep the existing tool revision, but `agentPlanStore.applySnapshot` must - accept a same-revision snapshot when it adds or changes `terminalReason`; otherwise the live float - can keep spinning until reload even though the persisted inline block is correct. -- `dismiss` is sticky for the whole active turn, not just the current revision. `beginTurn` resets - the dismissed flag. -- Store read paths (`isVisible`, `isCollapsed`) are pure and do not create persisted view-state keys. -- Rehydration scans loaded messages from newest to oldest and stops at the first persisted plan - block. -- Follow-up tests cover the subagent/no-progress path, process-level `max_steps` and abort-exception - wiring, same-revision terminal updates, sticky dismiss, pure getters, and session switch - rehydration isolation. - -## Second review follow-up fixes - -The second review found a real queue-path regression in the first follow-up: `dismiss` became -session-scoped while `beginTurn` is only called by renderer user handlers. Main-process automatic -pending-queue drain starts a new assistant turn without calling `beginTurn`, so the previous turn's -dismissed state can hide the next turn's live float. - -- `dismiss` is keyed to the current snapshot `messageId`, so it remains sticky for the active turn - but cannot leak into a later auto-drained turn. -- `agentPlanStore.applySnapshot` treats a changed `messageId` as a new turn boundary and accepts the - snapshot even when its revision is lower than the previous turn's last revision. Within the same - `messageId`, revision monotonicity remains intact and only same-revision terminal reason changes - are accepted. -- Main exposes a narrow `clearAgentPlanState(sessionId)` path and calls it after creating a new - assistant message. This resets backend `update_plan` revision state for user-initiated turns and - main auto-drained queue turns without clearing tool mappings. -- Tests cover the no-`beginTurn` auto-queue-visible store path, same-revision non-terminal drops, - direct plan-state reset wiring, auto-queue reset calls, and direct persisted-plan rehydration - helper behavior. - -## Third review follow-up fixes - -The third review found no functional blocker, but it identified two test guards that were still too -weak and two renderer view-state edge cases. - -- `finalizeError` tests must capture the `setMessageError` write-time blocks with `structuredClone` - and assert the persisted plan block already has `plan_terminal_reason: 'error'`. -- Renderer store tests must assert the actual `useStorage` value shape so `purge(sessionId)` cannot - become a no-op while tests stay green. -- `agentPlanStore.applySnapshot` resets collapsed state when a changed `messageId` establishes a new - live turn, so auto-drained queue turns appear expanded even when the previous turn was dismissed. -- Auto-collapse runs only when the same message transitions from not-all-completed to all-completed; - rehydration or repeated completed snapshots must not override a user's manual expansion. - -## Risks - -- **AD1 contract change** (`dispatch.test.ts:299`) is deliberate but touches a persisted-block - invariant; keep it isolated to one PR with the rewritten test + a rehydration test. The upsert - identity ("lone `type:'plan'` block in `state.blocks`") must be covered so revisions never - duplicate the block. -- **C1 guard** (fresh-`revision=1`-dropped) is the highest-risk regression; the store test must - reproduce "beginTurn → revision 1 → renders". -- **AD6 main-side stamping** must hook **every** turn-exit: `finalizeError`, the `MAX_TOOL_CALLS` - `finalize`, AND the **abort-exception early-return branch (`process.ts:527-535`) which bypasses - both** — or a reloaded plan still spins. Three traps: (1) `buildTerminalErrorBlocks` not touching - `plan_entries` status; (2) the abort branch returning before any finalize; (3) **DB writes living - only in the finalize family** — so the abort branch must persist (`updateAssistantContent` + flush) - after stamping, else "live" is fixed but "reload" is not. Cover each trigger (cancel, - abort-exception, tool-error, exception, max-steps) in tests **including a reload/persistence - assertion**; make the helper idempotent. -- **AD2 audit** may reveal subsystems A/B are both partially wired; resolve to one before touching - the hot path. Audit is its own task (T1), output recorded in this folder. -- Scope: R6 items are independent low-risk cleanups — separate commits so R1/R2 stay reviewable. diff --git a/docs/architecture/agent-plan-task-refactor/tasks.md b/docs/architecture/agent-plan-task-refactor/tasks.md deleted file mode 100644 index fa9645355..000000000 --- a/docs/architecture/agent-plan-task-refactor/tasks.md +++ /dev/null @@ -1,161 +0,0 @@ -# Agent Plan / `update_plan` Task — Tasks (v4) - -> Implemented. Decisions D1–D4 are hard-resolved in `spec.md` (D4: agent-mode history -> shows an inline `type:'plan'` block — settled). Ordered so the cheap terminal-state wins ship -> before the persistence refactor. Each task is one reviewable commit/PR. - -## Increment 0 — De-risk (must precede Increment 2) - -- [x] **T1 — ACP reachability audit (R5/AC14).** Trace both ACP subsystems end-to-end: does a - `type:'plan'` block today reach the persisted message stream and render via `MessageBlockPlan`? - Cover subsystem A (`acpProvider.handleSessionUpdate`, which drops `mapped.blocks`) vs subsystem - B (`AcpEventMapper`, instantiated at `acpClientPresenter/index.ts:18` but whose - `mapSessionUpdate` has **no found call site**). Record findings in - `acp-plan-reachability.md`. Output decides AD2 wiring (T8). **No deletion of `MessageBlockPlan`.** - -## Increment 1 — Stop the "stuck spinner" (no persistence, immediate value) - -- [x] **T2 — Prompt closure discipline (R7/AC22).** Extend `buildProgressPrompt`: reconcile every - step before finishing, never end a turn with a dangling `in_progress`. Mirrors Codex's - plan-closure rule. (+ prompt snapshot test.) -- [x] **T3 — Per-turn baseline + live freeze/rebaseline transitions (R2/AC4 live, AC5; C1).** In the - store add `beginTurn(sessionId)` (reset baseline → 0) and `freezeActive(sessionId)` (stops the - **live** spinner). Wire `beginTurn` into `onSubmit`/`onSteer`/`onMessageRetry`/ - `onMessageEditSave`/`onMessageContinue`; wire `freezeActive` into `onStop`. **No blanket - delete.** Scope: this only fixes the live in-session spinner; the persistable terminal marker - (reload) is T6. (+ renderer test: stop mid-plan → live float no longer spins; retry → clean - overlay.) -- [x] **T4 — Auto-collapse + sticky dismiss + default-expanded (R2/AC6,AC7; AC19).** View-state - `{ collapsed, dismissedRevision }`; default `collapsed=false` first appearance; auto-collapse - when all complete; `dismiss` sets `dismissedRevision`. (+ store test.) - -## Increment 2 — Persisted plan block + terminal state + rehydration (depends on T1; D1/D2/D4) - -- [x] **T5 — Upsert THE `type:'plan'` block per turn (R1/AD1).** In `dispatch.applyProgressUpdate`, - upsert the single `type:'plan'` block of the turn — locate the lone `type:'plan'` block in - `state.blocks`, mutate in place, else insert it **immediately after the first (hidden) - `update_plan` tool-call block**; carry - `plan_entries/plan_explanation/plan_revision/plan_updated_at`; keep the tool-call block - `internalTool=true`. **Rewrite `dispatch.test.ts:299`** to assert the upsert (idempotent across - revisions, never duplicated) + event. (+ test.) -- [x] **T6 — Persistable terminal marker on every turn-exit (R2/AC4 reload; AD6).** Add - `state.planTerminalReason?: 'aborted'|'max_steps'|'error'` to `StreamState` (`types.ts`); set - it `= 'max_steps'` right before the `break` at `process.ts:404`. Add an **idempotent** helper - `stampPlanTerminalIfOpen(state, io, reason)` that stamps `plan_terminal_reason` onto the latest - `type:'plan'` block (only when a step is still `in_progress`) and emits one final - `chat.plan.updated`. Call it from **all three exits**: `finalizeError` (`dispatch.ts:1489` — - covers cancel `process.ts:95`, tool error `:440`, context-window `:504`, no-response `:512`, - non-abort exception `:538`, interrupted recovery `messageStore.ts:615`), the `finalize` - (`:1475`) after `MAX_TOOL_CALLS`, **and the abort-exception catch branch (`process.ts:527-535`) - before its early `return`** (reason `aborted`). **Persistence boundary:** DB writes live only - in the finalize family (`updateAssistantContent` :1449 / `finalizeAssistantMessage` :1475 / - `setMessageError` :1504), so call the helper **before** those writes in `finalize`/ - `finalizeError`, and in the abort-exception branch **persist after stamping** - (`messageStore.updateAssistantContent` + `flushBlocksToRenderer`) since it has no finalize. - (`buildTerminalErrorBlocks` flips block status only — never rely on it for entry status.) - **Extend the event contract** `chat.events.ts:47-57` - with optional `terminalReason: z.enum(['aborted','max_steps','error'])` (+ update the - `defineEventContract` test; `AgentPlanViewSnapshot` derives it). Render `in_progress` - **without** `animate-spin` when terminal — directly in `MessageBlockPlan.vue` + - `AgentProgressFloat.vue` for now (consolidated into the composable by T13). (+ main tests: - cancel / **abort-exception (plan written → provider throws `AbortError` → `aborted`, - asserted persisted to messageStore, not just emitted)** / tool-error / exception / max-steps; - + renderer test: reload after an error/abort ending → no spin.) -- [x] **T7 — Rehydrate live float from persisted block on load/switch (R1/AC1–AC3; C1).** On - `loadMessages` / sessionId switch, take the latest `type:'plan'` block and - `agentPlanStore.applySnapshot`; rely on `beginTurn` (T3) so a subsequent live turn rebaselines - cleanly. Per-conversation isolation. (+ renderer rehydration + switch-isolation tests, incl. - the C1 guard.) -- [x] **T8 — Converge ACP onto the same block (R5/AC15; AD2).** Per T1's outcome, fix the ACP - producer/transport so its `type:'plan'` block renders via `MessageBlockPlan`; remove the - divergent/dead branch (producer side only — renderer stays). (+ test for the live ACP path.) - -## Increment 3 — Backend hygiene - -- [x] **T9 — Bound `states` + purge renderer (R3/AC8).** Wire `planTool.clearState(sessionId)` into - `destroySession` and `agentPlanStore.purge` on conversation delete. Backend revision may stay - process-local (safe under C1). (+ main test.) -- [x] **T10 — Remove dead surface (R3/AC9).** Drop `rawData.toolResult.snapshot` (only `onProgress` - consumed); remove `getState`/`clearState` if T9 leaves them unused. Document `onProgress` as - sole transport. -- [x] **T11 — Subagent orphan-key + missing-`toolCallId` (R3/AC10,AC11).** Stop subagent - `update_plan` from polluting the parent `states` Map; treat a missing `toolCallId` as an error - or logged drop, not silent success. (+ main tests.) - -## Increment 4 — Contracts & DRY - -- [x] **T12 — Status enum single source (R4/AC12; AD5).** `agentPlanStepStatusSchema` / - `agentPlanItemSchema` once in shared; import in tool schema + event contract; remove - re-declarations and the unreachable `failed`/`skipped` i18n (AC21). -- [x] **T13 — Shared status presentation composable (R4/AC13; AD4).** Extract - `useAgentPlanStatus.ts` (+ `normalizePlanEntry` + `resolveStepPresentation` incl. the terminal - non-spin rule from T6); both renderers consume it; unify completed styling. - -## Increment 5 — UX / i18n / a11y polish (independent, low-risk) - -- [x] **T14 — Parameterized completed counter (R6/AC16).** One pluralizable - `chat.workspace.plan.completedCount` across locales; float + inline badge consistent. - (+ per-locale render test.) -- [x] **T15 — Contrast + a11y (R6/AC17,AC18).** Completed-step text at `text-foreground` (mute icon - only); `aria-live="polite"`/`role="status"`; single disclosure control with `aria-expanded` + - `aria-controls`; drop the redundant chevron tab stop. -- [x] **T16 — Prune persisted view-state (R6/AC20).** GC the renamed `agent-plan-view-state` key on - conversation deletion (same flow as T9); one-time prune the legacy `agent-plan-collapsed` key. - -## Increment 6 — Review follow-up fixes - -- [x] **T17 — Accept same-revision terminal updates.** Keep terminal stamps on the existing plan - revision, but let `agentPlanStore.applySnapshot` accept a same-revision snapshot that adds or - changes `terminalReason`, so the live float does not keep spinning after `max_steps`/error. -- [x] **T18 — Make dismiss turn-sticky and store getters pure.** Replace revision-based dismiss - gating with a turn-scoped `dismissed` flag reset by `beginTurn`; make `isVisible` and - `isCollapsed` pure reads that do not create localStorage entries. -- [x] **T19 — Tighten rehydration.** Rename the store clear API to `clearSnapshot`, update - `ChatPage`, and scan loaded messages from newest to oldest, stopping at the first persisted - `type:'plan'` block. -- [x] **T20 — Backend cleanup.** Reduce `AgentPlanState` to the revision value that remains in use - and share the canonical `update_plan` tool-name constant with the shared block helper. -- [x] **T21 — Add runtime regression tests.** Cover subagent/no-progress isolation, process-level - `MAX_TOOL_CALLS` terminal stamping, abort-exception persistence, and terminal-stamp - idempotency with cloned messageStore write assertions. -- [x] **T22 — Add renderer regression tests.** Cover same-revision terminal acceptance, backend - terminal reason overriding optimistic freeze, sticky dismiss through later revisions, pure - getters, and session switch rehydration isolation. - -## Increment 7 — Second review follow-up fixes - -- [x] **T23 — Make dismiss message-scoped.** Store the dismissed `messageId` instead of a - session-level boolean; keep dismiss sticky for the current turn and allow the next auto-drained - turn to show its live float without requiring renderer `beginTurn`. -- [x] **T24 — Treat changed `messageId` as a new live plan turn.** Let `agentPlanStore.applySnapshot` - accept a lower/equal revision when `messageId` changes, while preserving same-message revision - monotonicity and same-revision terminal-only updates. -- [x] **T25 — Reset backend plan state at new assistant turn creation.** Add - `clearAgentPlanState(sessionId)` as a narrow ToolPresenter method and call it after - `createAssistantMessage`, covering user sends and main auto-drained queue turns without - clearing tool mappings. -- [x] **T26 — Add second-review regression tests.** Cover no-`beginTurn` next-message visibility, - same-revision non-terminal drops, narrow clear-state wiring, auto-queue reset calls, and direct - `snapshotFromAgentPlanBlock` hydration behavior. - -## Increment 8 — Third review follow-up fixes - -- [x] **T27 — Strengthen error stamp persistence guard.** Capture `setMessageError` blocks at call - time and assert ordinary `finalizeError` writes already include `plan_terminal_reason: 'error'`. -- [x] **T28 — Strengthen renderer purge view-state guard.** Make `agentPlanStore` tests assert the - real `useStorage` value shape so `purge(sessionId)` must delete persisted view-state. -- [x] **T29 — Keep new-message live plans expanded.** Reset collapsed state when `applySnapshot` - accepts a changed `messageId`, covering main auto-drained queue turns that do not call - `beginTurn`. -- [x] **T30 — Make auto-collapse transition-only.** Auto-collapse only when the same message moves - from not-all-completed to all-completed, not during rehydration or repeated completed updates. - -## Sequencing notes - -- **T1 first** — resolves the only remaining ambiguity (ACP reachability) and unblocks T5/T8. -- Increment 1 (T2–T4) ships independently, no persistence, fixes the most visible **live** symptom; - safe before T5 because freezing/rebaselining the live overlay does not touch persisted history. -- T5 carries a deliberate test-contract change; T6 (terminal marker) and T7 (rehydration) must land - with it so reload never shows a spinning or vanished plan. T7 must include the C1 guard test. -- Increments 3–5 are cleanup; interleave once Increment 2's contracts are settled. T13 absorbs the - inline terminal-render added directly in T6. diff --git a/docs/architecture/zod-v4-upgrade/plan.md b/docs/architecture/zod-v4-upgrade/plan.md new file mode 100644 index 000000000..45eb31c51 --- /dev/null +++ b/docs/architecture/zod-v4-upgrade/plan.md @@ -0,0 +1,72 @@ +# Zod 4 Native Migration Plan + +## Approach + +1. Upgrade the dependency to `zod@^4.4.3`, remove `zod-to-json-schema` and the + unused `@vee-validate/zod`, and refresh the lockfile. +2. Add `src/shared/lib/zodJsonSchema.ts` as the shared wrapper around + `z.toJSONSchema(schema, { io: 'input', unrepresentable: 'throw' })`, returning + the object schema shape required by project tool consumers. +3. Replace every `zodToJsonSchema(...)` call with the new helper. +4. Migrate Zod 3 legacy and deprecated APIs in source code: + - `.strict()` -> `z.strictObject(...)` + - `.passthrough()` -> `z.looseObject(...)` + - `.strip()` -> plain `z.object(...)` + - `.merge(...)` -> `.extend(...)` or shape spread + - `z.nativeEnum(...)` -> `z.enum(...)` + - one-argument `z.record(...)` -> `z.record(z.string(), ...)` + - `z.string().url()` -> `z.url()` + - `error.errors` -> `error.issues` + - `error.flatten()` -> `z.flattenError(error)` +5. Add migration-focused tests under the main test suite. +6. Harden provider-facing JSON Schema output so root schemas remain plain object + schemas without root composition or dialect keys. + +## Affected Interfaces + +- Dependency surface: `package.json`, `pnpm-lock.yaml`. +- Shared helper: `src/shared/lib/zodJsonSchema.ts`. +- Zod schema definitions in shared contracts, main presenter tools, MCP in-memory servers, browser tool definitions, scheduled task normalization, hooks notifications, remote-control types. +- Tests validating Zod 4 migration behavior. + +## Compatibility + +- IPC route/event names and payload fields remain unchanged. +- Existing tool and MCP schemas remain object schemas with `type`, `properties`, and optional `required`. +- Zod 4 native JSON Schema conversion runs in input mode so defaults/coercions are described as accepted input rather than post-parse output. +- Provider-facing root tool schemas must not expose root `$schema`, `$defs`, + `$ref`, `oneOf`, `anyOf`, or `allOf`; safe root metadata such as + `description` may be preserved, and nested property schemas may still use JSON + Schema composition when needed to represent a real field-level union. +- Root `allOf` schemas produced by Zod intersections are rejected fail-fast + because flattening them into a provider-facing object schema can weaken + intersection semantics. +- AI SDK mapper normalization preserves root-level shared `properties` and + `required` keys when externally supplied tool schemas combine a root object + with root `oneOf`, `anyOf`, or `allOf` branches. +- Loose schemas intentionally preserve unknown keys, plain object schemas intentionally strip unknown keys, strict schemas intentionally reject unknown keys. + +## Test Strategy + +- Focused tests: + - JSON Schema helper representative output. + - strict object rejects extra keys. + - loose object preserves extra keys. + - plain object strips extra keys. + - default/optional tool args behavior. + - recursive JSON value record parsing. + - clean provider-facing root schemas for object unions and nullable objects. + - rejection of unsupported top-level records, mixed object/non-object unions, + root intersections, and unrepresentable schema members. + - AI SDK mapper preservation of shared root properties and required keys when + normalizing composed external object schemas. +- Targeted commands: + - `pnpm run test:main -- test/main/routes/contracts.test.ts` + - `pnpm run test:main -- test/main/presenter/toolPresenter` + - `pnpm run test:main -- test/main/presenter/mcpPresenter` +- Final commands: + - `pnpm run format` + - `pnpm run i18n` + - `pnpm run lint` + - `pnpm run typecheck` + - `pnpm test` diff --git a/docs/architecture/zod-v4-upgrade/spec.md b/docs/architecture/zod-v4-upgrade/spec.md new file mode 100644 index 000000000..66e038bac --- /dev/null +++ b/docs/architecture/zod-v4-upgrade/spec.md @@ -0,0 +1,58 @@ +# Zod 4 Native Migration + +## User Need + +The user wants DeepChat to upgrade Zod from 3.x to the current stable 4.x release +and adopt the recommended Zod 4 APIs instead of doing a minimal compatibility +upgrade. + +## Goal + +Upgrade to `zod@^4.4.3`, remove Zod 3 legacy and deprecated API usage, and keep +the existing IPC contracts, MCP/tool schemas, persisted data formats, and business +behavior stable. + +## Acceptance Criteria + +- `package.json` uses `zod@^4.4.3` and no longer declares direct dependencies on + `zod-to-json-schema` or the unused `@vee-validate/zod`. +- Project Zod schemas use the recommended Zod 4 APIs: + - `z.strictObject(...)` + - `z.looseObject(...)` + - plain `z.object(...)` for strip behavior + - `z.flattenError(...)` + - `z.enum(EnumName)` + - two-argument `z.record(...)` + - top-level string format helpers such as `z.url()` +- MCP/tool JSON Schema conversion uses the native Zod 4 `z.toJSONSchema(...)` + API through a project helper that returns the object schema shape required by + existing tool consumers. +- Provider-facing tool schemas keep root JSON Schema composition out of the + published root object shape; Zod intersection/root `allOf` schemas are + rejected fail-fast until their semantics can be represented safely. +- AI SDK tool schema normalization preserves shared root object fields when it + flattens externally supplied root composition schemas. +- Route and event contract public wire shapes remain unchanged. +- Migration-focused tests cover strict, loose, and strip behavior, the JSON + Schema helper, default and optional tool parameters, and recursive JSON record + parsing. +- `format`, `i18n`, `lint`, `typecheck`, and relevant tests pass. + +## Constraints + +- Do not introduce `zod/mini`. +- Do not refactor business logic. +- Do not change IPC route or event names, payload fields, persisted schemas, or + MCP tool names. +- Keep the existing `import { z } from 'zod'` import style. + +## Non-goals + +- Do not migrate to a new form validation approach. +- Do not redesign MCP/tool business parameters. +- Do not refactor the typed route/event contract abstraction. +- Do not track Zod canary releases. + +## Open Questions + +None. diff --git a/docs/architecture/zod-v4-upgrade/tasks.md b/docs/architecture/zod-v4-upgrade/tasks.md new file mode 100644 index 000000000..9420a7910 --- /dev/null +++ b/docs/architecture/zod-v4-upgrade/tasks.md @@ -0,0 +1,17 @@ +# Zod 4 Native Migration Tasks + +- [x] Upgrade dependency declarations and lockfile. +- [x] Add native Zod JSON Schema helper. +- [x] Replace `zod-to-json-schema` imports and calls. +- [x] Migrate legacy Zod schema APIs to Zod 4 recommended APIs. +- [x] Add migration-focused tests. +- [x] Harden provider-facing JSON Schema root output and AI SDK normalization. +- [x] Address CodeRabbit schema normalization comments for root `allOf` + rejection and shared root field preservation. +- [x] Run focused migration, route, tool presenter, and MCP presenter tests. +- [x] Run formatting, i18n, lint, and typecheck. +- [ ] Full `pnpm test` has been run but is not green because of existing renderer + test failures outside this migration: + - `test/renderer/pages/NewThreadPage.test.ts` + - `test/renderer/components/ChatTabView.test.ts` + - `test/renderer/components/MemoryConfigPanel.test.ts` diff --git a/docs/archives/feishu-streaming-card-review-fixes/plan.md b/docs/archives/feishu-streaming-card-review-fixes/plan.md new file mode 100644 index 000000000..a247e3492 --- /dev/null +++ b/docs/archives/feishu-streaming-card-review-fixes/plan.md @@ -0,0 +1,26 @@ +# Plan + +## Review findings + +CodeRabbit comments are valid. Three runtime/client issues are functional or stability risks and should be fixed before merge. Locale comments are lower risk but cheap and should also be fixed to keep UI consistent. + +## Implementation approach + +1. Update Feishu client CardKit send behavior: + - Return `Promise` from `sendCardEntity`. + - Validate reply/create `message_id` immediately. + - Throw a clear error if the id is absent or blank. + +2. Update Feishu runtime streaming cleanup: + - Add a local close helper in `deliverConversationWithStreamingCard` for cancellation and loop-exit paths. + - Build card state immediately after `createStreamingCard`. + - If `sendCardEntity` fails, close the created card best-effort and rethrow so existing fallback continues. + +3. Update reviewed locale strings: + - Localize new Feishu `/pair` and CardKit streaming-card copy in the commented locale files. + - Preserve product names and permission strings. + +## Test strategy + +- Run Feishu client/runtime unit tests for the changed behavior. +- Run `pnpm run format`, `pnpm run i18n`, `pnpm run lint`, and `pnpm run typecheck` before commit. diff --git a/docs/archives/feishu-streaming-card-review-fixes/spec.md b/docs/archives/feishu-streaming-card-review-fixes/spec.md new file mode 100644 index 000000000..8446ca563 --- /dev/null +++ b/docs/archives/feishu-streaming-card-review-fixes/spec.md @@ -0,0 +1,34 @@ +# Feishu streaming card review fixes + +## User need + +Review PR #1823 comments, fix valid issues, and push the fixes to the existing PR branch. + +## Goal + +Address valid review feedback for Feishu streaming cards with minimal, focused changes. + +## Acceptance criteria + +- `sendCardEntity` fails fast when Feishu does not return a non-empty `message_id`. +- Any created CardKit streaming card is best-effort closed when a run is cancelled, exits mid-stream, or fails after card creation. +- Reviewed Feishu settings strings are localized instead of leaving new English copy in localized bundles. +- Relevant tests and project validation commands pass. +- Fixes are committed and pushed to `feat/feishu-streaming-cards`. + +## Constraints + +- Preserve the existing markdown fallback behavior. +- Do not weaken existing error handling or authentication. +- Keep changes limited to PR review feedback. +- Do not stage unrelated generated files. + +## Non-goals + +- Real Feishu/Lark app runtime validation. +- Broader CardKit design changes. +- Reworking unrelated i18n content. + +## Open questions + +None. diff --git a/docs/archives/feishu-streaming-card-review-fixes/tasks.md b/docs/archives/feishu-streaming-card-review-fixes/tasks.md new file mode 100644 index 000000000..3a0fedf2a --- /dev/null +++ b/docs/archives/feishu-streaming-card-review-fixes/tasks.md @@ -0,0 +1,9 @@ +# Tasks + +- [x] Inspect PR #1823 review comments. +- [x] Fix `sendCardEntity` missing `message_id` handling. +- [x] Close streaming cards on cancellation, loop exit, and post-create send failure. +- [x] Localize reviewed Feishu settings strings. +- [x] Add or update focused tests. +- [x] Run validation commands. +- [x] Commit and push fixes to PR branch. diff --git a/docs/archives/feishu-streaming-cards/plan.md b/docs/archives/feishu-streaming-cards/plan.md new file mode 100644 index 000000000..0e49ea179 --- /dev/null +++ b/docs/archives/feishu-streaming-cards/plan.md @@ -0,0 +1,77 @@ +# Feishu/Lark Streaming Cards Plan + +## Existing flow + +- `RemoteSettings.vue` edits `FeishuRemoteSettings`, then `RemoteControlPresenter.saveFeishuSettings()` persists normalized settings. +- `FeishuAdapter` creates `FeishuRuntime` with a `FeishuClient` and `FeishuCommandRouter`. +- `FeishuRuntime.deliverConversation()` polls `RemoteConversationExecution.getSnapshot()` and currently calls `syncDeliverySegments()` to send/update Feishu post messages. + +## Data model and interfaces + +1. Add `enableStreamingCards: boolean` to: + - `FeishuRemoteSettings` shared type. + - `FeishuRemoteRuntimeConfig` main runtime config. + - Zod config normalization/defaults. +2. Include the flag in Feishu adapter signatures and pass it from `FeishuAdapter` into `FeishuRuntime` as `enableStreamingCards`. +3. Do not change IPC route names: channel settings use `z.custom()` and continue carrying the typed settings object. + +## CardKit client additions + +Add low-level methods to `FeishuClient`: + +- `createStreamingCard(initialContent?: string): Promise<{ cardId: string; elementId: string }>` + - Calls `POST /open-apis/cardkit/v1/cards` via `sdk.request`. + - Sends `{ type: 'card_json', data: JSON.stringify(cardJson) }`. + - Card JSON uses schema `2.0`, `config.streaming_mode = true`, `config.update_multi = true`, `streaming_config`, and a markdown element with `element_id: 'md_stream'`. +- `sendCardEntity(target, cardId): Promise` + - Sends `msg_type: 'interactive'` with content `{"type":"card","data":{"card_id":"..."}}`. + - Uses reply or create path consistently with existing message send methods. +- `updateStreamingCardContent(cardId, elementId, content, sequence): Promise` + - Calls `PUT /open-apis/cardkit/v1/cards/:card_id/elements/:element_id/content` with full text. +- `closeStreamingCard(cardId, sequence): Promise` + - Calls `PATCH /open-apis/cardkit/v1/cards/:card_id/settings` with `settings` containing `config.streaming_mode: false`. + +All CardKit helpers throw clear `Feishu CardKit ...` errors when API responses are non-zero or missing required IDs. + +## Runtime delivery + +When `enableStreamingCards` is false, keep existing `deliverConversation()` behavior. + +When true: + +1. Poll snapshots as before. +2. Build the full card text by joining delivery segments in order: + - Live `statusText` is grouped under `**Status**` while the answer is active, so thinking/running state appears inside the streaming card. + - Process segments are grouped under `**Process**`. + - Answer/terminal segments are grouped under `**Answer**`. + - Use `optimizeMarkdownForFeishu()` on the final full text, preserving fenced code blocks and table content for CardKit markdown rendering. +3. On first non-empty text, create a streaming card, send it, then update the markdown element with sequence `1`. +4. On later text changes, update the same markdown element with the new full text and the next sequence. +5. On completion or timeout, ensure final text is sent, then close streaming mode with the next sequence and clear remote delivery state. +6. If any streaming card operation fails, log a warning and fall back to the existing `syncDeliverySegments()` path for that conversation. + +The runtime keeps streaming card state only in-memory within the active delivery call. This matches the current queue-based remote delivery lifecycle and avoids changing `RemoteBindingStore`'s generic message-id delivery state. + +## UI and i18n + +- Add a switch in the Feishu remote-control section after access rules and before default agent/workdir. +- Use i18n keys: + - `settings.remote.feishu.streamingCards` + - `settings.remote.feishu.streamingCardsDescription` +- Include the flag in `defaultFeishuSettings()`, field sync, and draft building. + +## Tests + +- `feishuClient.test.ts`: CardKit create/send/update/close serialization and error handling. +- `feishuRuntime.test.ts`: streaming-card mode creates/sends/updates/closes; disabled mode still uses markdown; CardKit failure falls back to markdown; status/process/answer text and Markdown code/table content are preserved in streaming updates. +- `RemoteSettings.test.ts`: switch renders from settings and persists changed flag. +- Existing tests may need fixture updates to include `enableStreamingCards`. + +## Validation + +Run targeted tests first, then required project checks: + +1. `pnpm test -- --run test/main/presenter/remoteControlPresenter/feishuClient.test.ts test/main/presenter/remoteControlPresenter/feishuRuntime.test.ts test/renderer/components/RemoteSettings.test.ts` +2. `pnpm run format` +3. `pnpm run i18n` +4. `pnpm run lint` diff --git a/docs/archives/feishu-streaming-cards/spec.md b/docs/archives/feishu-streaming-cards/spec.md new file mode 100644 index 000000000..b07d1bdd8 --- /dev/null +++ b/docs/archives/feishu-streaming-cards/spec.md @@ -0,0 +1,44 @@ +# Feishu/Lark Streaming Cards + +## User need + +Feishu/Lark remote-control users want AI responses to appear progressively with a token-by-token/typewriter effect instead of waiting for a normal message update or a complete answer. + +## Goal + +Add an opt-in Feishu/Lark remote-control setting that delivers AI conversation responses through CardKit streaming cards. When enabled, DeepChat creates a CardKit card entity with `streaming_mode` enabled, sends it to the target chat, updates its markdown component with full text snapshots using a strictly increasing sequence number, and closes streaming mode when the response finishes. + +## Acceptance criteria + +1. The Feishu/Lark remote settings page exposes a "Streaming Cards" switch with helper text explaining required CardKit permissions. +2. The new setting persists with the remote-control Feishu configuration and defaults to off for existing users. +3. When disabled, Feishu remote-control delivery keeps the existing standard markdown/post message behavior. +4. When enabled for a conversation response: + - DeepChat creates a CardKit JSON 2.0 card entity with `config.streaming_mode: true` and `config.update_multi: true`. + - DeepChat sends that card entity as an interactive message using the returned `card_id`. + - DeepChat updates the card markdown element with the full rendered response text on each snapshot and uses strictly increasing `sequence` values. + - DeepChat preserves Markdown content for Feishu CardKit rendering, including fenced code blocks and pipe tables. + - DeepChat shows tool-call progress in the streaming card process section. + - DeepChat shows the current thinking/running status in the streaming card while the response is active. + - DeepChat closes streaming mode with the CardKit settings API when the response completes or times out. +5. If CardKit creation, send, update, or close fails because of permissions or API errors, the runtime falls back to the current markdown/post delivery path and logs a clear warning without exposing secrets. +6. Tests cover client request serialization, settings persistence/UI, streaming-card delivery updates, closing behavior, and fallback to standard markdown delivery. + +## Constraints + +- Use existing remote-control Presenter/Adapter boundaries and typed shared settings. +- Use the existing Lark SDK client request escape hatch for CardKit endpoints because the installed SDK does not expose typed CardKit helpers. +- Do not log App Secret, tokens, or other credentials. +- Keep the implementation small: one streaming card per assistant response, one markdown element (`md_stream`) updated with full text. +- Respect Feishu CardKit limits known from docs: JSON 2.0 cards, `update_multi: true`, card entity send-once, and increasing sequence values. + +## Non-goals + +- No custom streaming-card template editor. +- No interactive buttons inside the streaming response card. +- No streaming cards for generated image delivery; images continue through the existing image path after text completion. +- No change to the official Feishu MCP plugin settings page. + +## Open questions + +None. diff --git a/docs/archives/feishu-streaming-cards/tasks.md b/docs/archives/feishu-streaming-cards/tasks.md new file mode 100644 index 000000000..fc7c3e6b4 --- /dev/null +++ b/docs/archives/feishu-streaming-cards/tasks.md @@ -0,0 +1,12 @@ +# Feishu/Lark Streaming Cards Tasks + +- [x] Inspect issue #1814 and existing Feishu remote-control flow. +- [x] Create SDD spec, plan, and tasks. +- [x] Extend Feishu remote settings/config types and normalization with `enableStreamingCards`. +- [x] Add Feishu settings UI switch and i18n strings. +- [x] Add CardKit streaming card helpers to `FeishuClient`. +- [x] Implement streaming-card delivery path in `FeishuRuntime` with markdown fallback. +- [x] Show live thinking/running status inside the streaming card and preserve Markdown code/table content in streaming updates. +- [x] Update/add client, runtime, presenter, and renderer tests. +- [x] Run targeted tests. +- [x] Run `pnpm run format`, `pnpm run i18n`, and `pnpm run lint`. diff --git a/docs/features/cua-plugin-icon/spec.md b/docs/features/cua-plugin-icon/spec.md new file mode 100644 index 000000000..a69604703 --- /dev/null +++ b/docs/features/cua-plugin-icon/spec.md @@ -0,0 +1,49 @@ +# CUA Plugin Icon + +## User Need + +The CUA Computer Use official plugin should be easier to recognize in the plugin hub and detail page. + +## Goal + +Use `lucide:laptop-minimal-check` for `com.deepchat.plugins.cua` instead of the generic puzzle icon. + +## Acceptance Criteria + +- The added plugins row shows the CUA plugin with `lucide:laptop-minimal-check`. +- The plugin catalog card shows the CUA plugin with `lucide:laptop-minimal-check`. +- The CUA plugin detail header shows `lucide:laptop-minimal-check`. +- Other non-special official plugins keep the generic puzzle icon. + +## UI Sketch + +Before: + +```text ++---------------------------+ +| [puzzle] CUA Computer Use | ++---------------------------+ +``` + +After: + +```text ++-----------------------------------------+ +| [laptop-minimal-check] CUA Computer Use | ++-----------------------------------------+ +``` + +## Constraints + +- Keep the change renderer-only. +- Do not add a manifest icon field for a single plugin. +- Do not change plugin runtime behavior. + +## Non-Goals + +- Redesign plugin cards. +- Add configurable icon infrastructure. + +## Open Questions + +- None. diff --git a/docs/features/deepchat-skills-management/plan.md b/docs/features/deepchat-skills-management/plan.md new file mode 100644 index 000000000..98f7ec3f4 --- /dev/null +++ b/docs/features/deepchat-skills-management/plan.md @@ -0,0 +1,643 @@ +# DeepChat Skills Management Implementation Plan + +## Architecture Fit + +Use the existing split: + +- Main runtime owner: `src/main/presenter/skillPresenter/index.ts` +- External scan/conversion owner: `src/main/presenter/skillSyncPresenter/index.ts` +- Shared types: `src/shared/types/*` +- Route contracts: `src/shared/contracts/routes/*` +- Route dispatch: `src/main/routes/index.ts` +- Renderer API clients: `src/renderer/api/*Client.ts` +- Settings UI: `src/renderer/settings/components/skills/*` + +Do not create a new top-level Presenter for V1. Add small helper modules under the existing +presenter folders where code size requires it. + +## Current Gaps + +| Gap | Current state | Needed change | +| --- | --- | --- | +| Database state | Runtime extension settings currently live in per-skill files under `.deepchat-meta/.json`. | Move skill management state into the application database and treat `.deepchat-meta` as legacy migration input. | +| Library disabled state | `getMetadataList()` and `getMetadataPrompt()` expose all visible skills. | Add a Library catalog that includes disabled skills, and filter disabled skills from runtime paths. | +| Agent ownership | `SkillSyncPresenter` scans external tools but does not classify links or ownership. | Add user-level folder-format agent management scan/classification. | +| Adoption | Existing import copies external skills into DeepChat, but does not move agent-owned folders or create links. | Add adopt preview/execute with private backups and link creation. | +| Link repair/remove | No DeepChat-owned link model. | Track created links in database state and only repair/remove those safely. | +| Git install | `installFromUrl` downloads ZIP only. | Add Git clone scan/install flow with provenance, opened from the top add menu. | +| Sync directory | Existing import/export targets registered tools, not a user-selected multi-skill repo directory. | Add native sync directory preview/execute APIs, labeled as sync directory instead of agent export. | +| Skill details | Long descriptions currently expand list/table rows. | Add one reusable detail dialog that renders manifest data and `SKILL.md` Markdown. | +| Settings UX | The first implementation over-split Library, Agents, Import / Export, Install, and Discover. | Collapse to Library, Agents, and Sync Directory. Folder/ZIP/URL/Git install lives under top Add Skill; install-to-agent lives on each Library row. | + +## Data Model + +Add `src/shared/types/skillManagement.ts`. + +```ts +export type SkillSourceType = + | 'builtin' + | 'created' + | 'folder-install' + | 'zip-install' + | 'url-install' + | 'git-install' + | 'adopted' + | 'imported' + +export type SkillRepoFormat = 'single-skill' | 'multi-skill' + +export interface SkillManagementState { + version: 1 + skills: Record + sync?: SkillSyncDirectoryConfig +} + +export interface SkillManagementItem { + name: string + canonicalPath: string + deepchat: { + disabled: boolean + } + extension: SkillExtensionConfig + source: SkillSource + agentLinks?: Record +} + +export interface SkillSource { + type: SkillSourceType + repoUrl?: string + repoFormat?: SkillRepoFormat + agentId?: string + originalPath?: string + importedFrom?: string + installedAt?: string + importedAt?: string + adoptedAt?: string +} + +export interface AgentLinkInfo { + path: string + state: 'linked' | 'missing' | 'broken' | 'conflict' | 'permission-denied' + createdByDeepChat: boolean + linkedAt?: string +} + +export interface SkillSyncDirectoryConfig { + skillsDirectory: string + layout: 'multi-skill-repo' + lastExportAt?: string | null + lastImportAt?: string | null +} +``` + +Database state rules: + +- Store only durable state that cannot be derived cheaply from files. +- Store V1 state in the existing application database, preferably through the DB-backed settings + path (`app_settings`) unless implementation proves dedicated SQL tables are needed. +- Rebuild missing database entries from discovered DeepChat skills with `source.type = 'created'` + only as a fallback. Keep current built-in install behavior, but mark bundled resources as + `builtin` when source can be recognized. +- Migrate legacy runtime extension sidecars from `/.deepchat-meta/.json` into + database state on first load. +- After successful migration, remove the migrated legacy sidecar files. If migration fails, leave + legacy files untouched for retry. +- New writes go only to the database. +- The skills path must not be the canonical storage location for management metadata. +- Use database transactions for multi-skill state writes. + +## Presenter Changes + +### SkillPresenter + +Add helpers: + +- `managementState.ts`: load/save/migrate database-backed skill management state. +- `gitInstall.ts`: clone/scan/install Git repositories. +- `importExport.ts`: native sync directory import/export. + +Add or extend methods on `ISkillPresenter`: + +- `getUnifiedSkillCatalog(): Promise` +- `getSkillDetail(input: { name: string }): Promise` +- `setSkillDeepChatDisabled(name: string, disabled: boolean): Promise` +- `getSkillManagementState(): Promise` +- `scanGitSkillRepo(input): Promise` +- `installSkillsFromGit(input): Promise` +- `getSkillsSyncConfig(): Promise` +- `setSkillsSyncDirectory(input): Promise` +- `previewSyncDirectoryExport(input): Promise` +- `executeSyncDirectoryExport(input): Promise` +- `previewSyncDirectoryImport(input): Promise` +- `executeSyncDirectoryImport(input): Promise` + +Runtime filtering: + +- `getMetadataPrompt()` excludes disabled skills. +- `loadSkillContent(name)` returns `null` for disabled skills unless an explicit internal option is + added later. +- `validateSkillNames()` excludes disabled skills. +- `getActiveSkillsAllowedTools()` inherits disabled filtering. +- `getUnifiedSkillCatalog()` includes disabled skills for Library. + +Install provenance: + +- `installFromFolder`, `installFromZip`, and `installFromUrl` should update database source type. +- Existing folder/ZIP/URL behavior must remain compatible. +- Existing overwrite backup under the skills directory should be removed from the target design. + Normal install replacement and adoption backups both use private backup/temp locations outside + the skills path. + +### SkillSyncPresenter + +Keep read-only agent scan/classification inside `SkillSyncPresenter` for the first pass. Extract an +`agentManagement.ts` helper only when adoption, repair, remove, and custom path actions make the +method set large enough to justify another module. + +Methods to add to `ISkillSyncPresenter`: + +- `scanSkillAgents(): Promise` +- `scanSkillAgent(input: { agentId: string }): Promise` +- `getAgentSkillDetail(input: { agentId: string; name: string }): Promise` +- `previewAdoptAgentSkill(input): Promise` +- `executeAdoptAgentSkill(input): Promise` +- `previewLinkDeepChatSkills(input): Promise` +- `executeLinkDeepChatSkills(input): Promise` +- `repairAgentSkillLink(input): Promise` +- `removeAgentSkillLink(input): Promise` +- `addCustomSkillAgentPath(input): Promise` + +Use `toolScanner.getAllTools()` as the registered tool source, but filter link/adopt targets to +user-level folder-format tools: + +```ts +const canManageLinks = + !tool.isProjectLevel && + tool.filePattern === '*/SKILL.md' && + tool.capabilities.supportsSubfolders +``` + +Classification should inspect each entry without writing: + +```txt +symlink -> target missing => broken-link +symlink -> target under skillsDir => deepchat linked +symlink -> other target => external-link +real dir + DeepChat same name + diff => conflict +real dir + no DeepChat same name => agent-owned +``` + +Use content hashes only for conflict detection after verifying both sides have `SKILL.md`. + +## Route And Client Changes + +Extend route contracts: + +- `src/shared/contracts/routes/skills.routes.ts` +- `src/shared/contracts/routes/skillSync.routes.ts` + +Extend Zod schemas in `src/shared/contracts/domainSchemas.ts` only for route payload validation. +Route dispatch remains in `src/main/routes/index.ts`. + +Extend renderer clients: + +- `src/renderer/api/SkillClient.ts` for Library, Git, and sync directory calls. +- `src/renderer/api/SkillSyncClient.ts` for agent management calls. + +Add event contracts only where UI needs push refresh: + +- `skills.catalog.changed`: add reason values for `disabled-updated`, `management-state-updated`, + `git-installed`, and `sync-directory-updated`. +- Add `skillSync.agentLinks.changed` if link/adopt actions need passive refresh. + +Keep scan/import/export progress events unchanged. + +### Route API Shape + +Library: + +```ts +export interface UnifiedSkillItem { + name: string + description: string + canonicalPath: string + sourceType: SkillSourceType + deepchatDisabled: boolean + agentLinks: Record + ownerPluginId?: string + mutable: boolean +} + +export interface SkillDetail { + name: string + description: string + sourcePath: string + markdown: string + mutable: boolean +} +``` + +Agents: + +```ts +export type AgentSkillOwner = 'deepchat' | 'agent' | 'external-link' | 'broken-link' | 'unknown' + +export type AgentSkillStatus = + | 'linked' + | 'agent-owned' + | 'linked-out' + | 'broken-link' + | 'conflict' + | 'empty' + +export type AgentSkillAction = + | 'adopt' + | 'resolve-conflict' + | 'repair-link' + | 'remove-link' + | 'open' + +export interface InstalledSkillAgent { + id: string + name: string + skillsDir: string + isCustom: boolean + supportsLinkManagement: boolean + skillsCount: number + linkedCount: number + agentOwnedCount: number + conflictCount: number + brokenLinkCount: number + status: 'ready' | 'detected-no-skills-dir' | 'permission-denied' +} + +export interface AgentSkillItem { + name: string + description?: string + path: string + owner: AgentSkillOwner + status: AgentSkillStatus + action?: AgentSkillAction + link?: { + isSymlink: boolean + targetPath?: string + targetExists?: boolean + targetInsideDeepChat?: boolean + createdByDeepChat?: boolean + } + deepchat?: { + exists: boolean + path?: string + disabled?: boolean + sameContent?: boolean + } +} +``` + +Git install: + +```ts +export interface GitSkillRepoScanResult { + repoUrl: string + repoFormat: 'single-skill' | 'multi-skill' + skills: Array<{ + name: string + description: string + relativePath: string + conflict: boolean + valid: boolean + error?: string + }> +} +``` + +Sync directory: + +```ts +export type SyncDirectorySkillState = 'new' | 'same' | 'modified' | 'conflict' | 'invalid' + +export interface SyncDirectorySkillPreview { + name: string + state: SyncDirectorySkillState + sourcePath: string + targetPath: string + error?: string +} +``` + +## File Operations + +Base directories: + +```txt +// +application database: skill management state +~/.deepchat/backups/skill-adoptions//// +~/.deepchat/tmp/skill-adoptions// +~/.deepchat/tmp/skill-installs// +~/.deepchat/tmp/skill-imports// +``` + +The configured skills path is a content root only. It must not contain `.deepchat-meta`, metadata +files, backup folders, temp folders, or rollback folders in the target design. + +Adoption flow: + +```txt +1. Resolve tool and skill row from a fresh scan. +2. Validate source is inside the selected agent skills directory. +3. Resolve symlink source when adopting external-link rows. +4. Validate `SKILL.md` and skill name. +5. Choose target name, defaulting to `-` on conflict. +6. Copy source content to private temp. +7. Validate copied `SKILL.md` and hash. +8. Move temp to `/`. +9. Move original agent path to private backup. +10. Create directory symlink; on Windows fallback to junction. +11. Write database source provenance and agentLinks. +12. Rediscover DeepChat skills and rescan the selected agent. +``` + +Agent directories must never receive: + +```txt +*.backup +*.old +*.deepchat-backup-* +.deepchat-meta +tmp +``` + +## Git Install + +Implementation: + +- Use `child_process.execFile` or existing process utility with `git` directly. Do not add a Git + dependency. +- Clone into `~/.deepchat/tmp/skill-installs/`. +- Detect: + - root `SKILL.md` => `single-skill` + - `skills//SKILL.md` => `multi-skill` +- Reuse existing skill validation and copy logic where possible. +- Support strategies: `rename`, `overwrite`, `skip`. +- Record `repoUrl`, `repoFormat`, and `installedAt`. +- Always remove temp clone after install/scan completion. + +## Sync Directory + +This is separate from existing external tool import/export. + +Export: + +```txt +/ + README.md + skills/ + / + SKILL.md + assets/ + references/ + scripts/ +``` + +Import: + +- Scan only `/skills/*/SKILL.md`. +- Validate each skill before preview. +- Show state: `new`, `same`, `modified`, `conflict`, `invalid`. +- Apply `rename`, `overwrite`, or `skip`. +- Record `source.type = 'imported'`, `importedFrom`, and `importedAt`. + +## Renderer Plan + +Convert `SkillsSettings.vue` into three tabs and one top add menu: + +```txt +SettingsPageShell + Actions: search where relevant, Add Skill menu + Tabs + Library + Agents + Sync Directory +``` + +Reuse or adapt: + +- Existing `SkillCard` for Library rows. +- Existing `SkillInstallDialog` folder/ZIP/URL UI from the top Add Skill menu. +- Existing Git install dialog logic from the top Add Skill menu. +- Existing link/sync-to-agent backend from a single-skill Library row action. + +New components: + +- `SkillAgentsTab.vue` +- `AgentSkillTable.vue` +- `AdoptSkillDialog.vue` +- `ResolveSkillConflictDialog.vue` +- `InstallSkillToAgentDialog.vue` +- `SkillDetailDialog.vue` +- `SkillImportExportTab.vue` +- `InstallFromGitDialog.vue` + +Keep user-facing strings in `src/renderer/src/i18n/*/settings.json`. + +### Renderer Style Contract + +Use current settings UI patterns instead of a new design system: + +- Shell: `SettingsPageShell`. +- Tabs: existing shadcn tabs. +- Tables/lists: plain bordered row groups with compact spacing. +- Actions: `Button` with lucide/Iconify icons; destructive actions stay in menus or confirm dialogs. +- Toggles: `Switch` for DeepChat-only enabled/disabled. +- Selection: `Checkbox` for skill multi-select. +- Conflict strategies: `RadioGroup`. +- Paths: monospace text, truncated with tooltip. +- Status: badge text plus semantic color. + +Recommended tab component shape: + +```txt +SkillsSettings.vue + Add Skill menu + SkillInstallDialog.vue + InstallFromGitDialog.vue + SkillCard.vue + SkillDetailDialog.vue + InstallSkillToAgentDialog.vue + SkillAgentsTab.vue + AgentSkillTable.vue + SkillDetailDialog.vue + AdoptSkillDialog.vue + ResolveSkillConflictDialog.vue + CustomAgentPathDialog.vue + SkillImportExportTab.vue as Sync Directory +``` + +Description handling: + +```txt +List/table row: one-line clamp or no description. +Detail dialog: full manifest description plus rendered Markdown from SKILL.md. +``` + +Library row interaction: + +```txt +SkillCard.vue + non-control area click -> SkillDetailDialog.vue + exposed controls: + [Install to Agent] InstallSkillToAgentDialog.vue + [switch] DeepChat enable/disable + +SkillDetailDialog.vue + preview mode: rendered SKILL.md body + edit mode: name (read-only), description, allowedTools, Markdown content + actions: Install to Agent, enable/disable, Edit/Preview, Delete with confirm, Save/Cancel +``` + +Loading, empty, and error states: + +```txt +Loading: +[spinner] Scanning installed agents... + +Empty: +No supported agents found. +[Refresh] + +Permission error: +Cannot read ~/.claude/skills +[Open Folder] [Refresh] + +Broken link: +Target missing: ~/.deepchat/skills/foo +[Repair] [...] +``` + +Do not add nested cards. A tab may have one top toolbar and one primary list/table area; dialogs are +the only framed surfaces that may contain form sections. + +### File Change Range + +Expected source files across the full feature. Phase 2 keeps read-only agent classification in +`SkillSyncPresenter.index.ts`; do not add a dedicated agent-management module until write actions +make that separation useful. + +```txt +src/main/presenter/skillPresenter/ + index.ts + managementState.ts + gitInstall.ts + importExport.ts + +src/main/presenter/skillSyncPresenter/ + index.ts + toolScanner.ts + +src/shared/types/ + skill.ts + skillManagement.ts + skillSync.ts + +src/main/presenter/sqlitePresenter/tables/ + configTables.ts + +src/shared/contracts/routes/ + skills.routes.ts + skillSync.routes.ts + +src/shared/contracts/events/ + skills.events.ts + skillSync.events.ts + +src/renderer/api/ + SkillClient.ts + SkillSyncClient.ts + +src/renderer/settings/components/skills/ + SkillsSettings.vue + SkillAgentsTab.vue + AgentSkillTable.vue + AdoptSkillDialog.vue + ResolveSkillConflictDialog.vue + InstallSkillToAgentDialog.vue + SkillDetailDialog.vue + CustomAgentPathDialog.vue + SkillImportExportTab.vue + InstallFromGitDialog.vue +``` + +## Security And Compatibility + +- Reuse `skillSyncPresenter/security.ts` path safety helpers where possible. +- Add symlink-aware containment checks for existing and not-yet-existing paths. +- Never follow recursive symlink loops during scan or copy. +- Skip symlinks during copied skill content unless adopting an external-link target explicitly. +- Enforce current skill name rules: `^[a-z0-9][a-z0-9._-]*$`. +- Enforce current file/ZIP/folder size ceilings or stricter ceilings for new flows. +- Keep `skillsPath` compatibility. All "DeepChat skills path" operations use + `configPresenter.getSkillsPath()`, not hard-coded `~/.deepchat/skills`. +- Migrate legacy `/.deepchat-meta/*.json` sidecars into database state, then stop writing + new sidecar files. +- Scans must ignore legacy `.deepchat-meta` until migration cleanup is implemented. +- Plugin-contributed skills are read-only catalog entries and are excluded from mutable actions. +- Detail routes must read only from the selected DeepChat skill path or the freshly scanned supported + agent skill path. + +## Test Strategy + +Main unit tests: + +- Database-backed management state load/save/migration. +- Legacy `.deepchat-meta/.json` sidecar import into database state. +- Assertion that new runtime extension writes do not create files under the skills path. +- Disabled filtering in metadata prompt, `loadSkillContent`, `validateSkillNames`, and Library + catalog inclusion. +- Agent scan classification for linked, agent-owned, external-link, broken-link, and conflict. +- Adoption success, conflict rename, backup location, and no backup residue in agent directory. +- Link create/repair/remove, including Windows junction fallback mocked path. +- Git single-skill and multi-skill scan/install with temp cleanup. +- Sync directory import/export preview and conflict strategies. +- Skill detail route path safety for DeepChat and agent-owned skills. + +Renderer tests: + +- Skills tabs render as Library, Agents, and Sync Directory only. +- Top Add Skill menu exposes folder, ZIP, URL, and Git install choices. +- Disabled toggle calls typed client and updates UI. +- Library row Install to Agent opens detected local agents and calls the link client for one skill. +- Library Install to Agent shows Disconnect and calls the remove-link client when the selected agent + is already linked. +- Skill card body click opens detail while exposed install/toggle controls do not trigger detail. +- Skill detail dialog renders long `SKILL.md` content without expanding list/table rows. +- Skill detail dialog owns edit/save and delete-with-confirm controls for mutable DeepChat skills. +- Agents table row actions map to the right client methods. +- Agents tab uses icon-leading agent tab buttons and has no bulk "Sync to Agent" button. +- Git install dialog scan/select/install states from the top add menu. +- Sync Directory preview states. +- Discover tab and `find-skills` resource are absent. + +Smoke tests: + +- Extend existing skills read-only route smoke for new Library routes. +- Extend skill sync smoke for agent scan routes without mutating user files. + +Manual checks: + +- macOS/Linux symlink creation. +- Windows junction fallback. +- Agent directory remains clean after adoption. + +## Delivery Order + +1. Database state and Library disabled state. +2. Agents scan/classification UI with no mutations. +3. Adoption, link, repair, and remove. +4. Git install through the top add menu. +5. Sync directory import/export. +6. UX consolidation: remove Install and Discover tabs, remove `find-skills`, move install-to-agent + to Library rows, and add reusable skill details. + +This order produces a useful first slice after step 3: users can take over existing local +folder-format agent skills without polluting agent directories. diff --git a/docs/features/deepchat-skills-management/spec.md b/docs/features/deepchat-skills-management/spec.md new file mode 100644 index 000000000..debfeccb3 --- /dev/null +++ b/docs/features/deepchat-skills-management/spec.md @@ -0,0 +1,736 @@ +# DeepChat Skills Management + +## Current-State Corrections + +The source draft describes the right product direction, but several parts need to be corrected for +the current codebase before implementation: + +- This is not a greenfield skills system. `SkillPresenter` already owns local skill discovery, + install/uninstall, hot reload, built-in skill installation, legacy sidecar runtime config, and + session activation. +- This is not a generic "all agents are sync targets" feature. V1 link/adopt operations are only + valid for user-level folder-format tools that use `//SKILL.md`. Project-level + and single-file tools remain import/export conversion targets only. +- Git install is not the same as current `installFromUrl`. The existing URL path downloads ZIP + files; Git install needs clone/scan/select/install provenance. +- The settings UX should stay in the existing `settings-skills` route, but V1.1 must remove the + over-split five-tab surface. Library owns add/install-to-agent actions, Agents owns inspection and + adoption, and sync directory remains a separate local repository workflow. +- A command-copy Discover tab is not useful enough to keep. Remove it and do not bundle + `find-skills` as a built-in skill. + +## User Need + +Users need DeepChat to act as the local control center for skills: see local DeepChat skills, disable +them for DeepChat without deleting files, install new skills from the top add menu, install a +specific DeepChat skill to a detected local agent from that skill row, inspect existing +folder-format skills from installed agents, adopt those skills into DeepChat safely, and move skills +in or out of a user-selected sync directory. + +## Goals + +- Keep DeepChat runtime skills canonical under the configured skills path, defaulting to + `~/.deepchat/skills`. +- Store source provenance, DeepChat-only disabled state, runtime extension settings, sync directory + settings, and DeepChat-created agent links in the application database. +- Keep the configured DeepChat skills path as a pure content directory: only skill folders and their + files belong under it. +- Preserve the existing `SkillPresenter` runtime behavior while adding a Library catalog that can + show disabled skills. +- Add an Agents tab that scans detected user-level folder-format tools and classifies each skill as + DeepChat-linked, agent-owned, external-link, broken-link, or conflict. +- Adopt agent-owned folder-format skills by copying the skill into DeepChat, backing up the original + under `~/.deepchat/backups`, and replacing the agent path with a link to the DeepChat canonical + skill. +- Link a selected DeepChat skill to a supported local agent from the Library row action by creating a + symlink or Windows junction. +- Add Git repository installation for single-skill repos with root `SKILL.md` and multi-skill repos + with `skills//SKILL.md` through the top add menu. +- Add manual import/export to a user-selected multi-skill sync directory. +- Add a reusable skill detail dialog that clamps list/table descriptions and renders the selected + `SKILL.md` body as Markdown. +- Remove the top-level Install and Discover tabs; remove the old external-tool import block from the + Library tab. + +## Existing Capabilities To Preserve + +- `SkillPresenter` discovers `SKILL.md` files under the configured skills path. +- `SkillPresenter` installs from folder, ZIP, and ZIP URL. +- `SkillPresenter` installs built-in skills from `resources/skills`. +- `SkillPresenter` currently stores per-skill runtime extension config under `.deepchat-meta`; the + target design migrates this state into the database and treats `.deepchat-meta` as legacy input. +- `SkillPresenter` watches skill file changes and publishes `skills.catalog.changed`. +- `SkillSyncPresenter` scans registered external tools, imports external skills into DeepChat, and + exports DeepChat skills to external tool formats. +- Renderer-main communication uses typed route contracts and renderer API clients. +- Skills settings currently live at `settings-skills` in `SkillsSettings.vue`. + +## Directory Layout + +DeepChat-managed skills use the configured skills path. When the user has not changed it, the path +is: + +```txt +~/.deepchat/ + skills/ + skill-a/ + SKILL.md + assets/ + references/ + scripts/ + skill-b/ + SKILL.md + backups/ + skill-adoptions/ + claude-code/ + old-review/ + 20260626-153000/ + original/ + SKILL.md + adoption.json + tmp/ + skill-adoptions/ + skill-installs/ + skill-imports/ +``` + +Database-backed management state: + +```txt +application database + skill metadata/provenance + DeepChat-only disabled flags + runtime extension settings + agent link ownership + sync directory config and timestamps +``` + +After adoption or Library install-to-agent, supported agent directories should contain final skills +or links only: + +```txt +~/.claude/skills/ + old-review -> ~/.deepchat/skills/old-review + guizang-ppt -> ~/.deepchat/skills/guizang-ppt +``` + +Manual sync directory layout: + +```txt +~/Documents/deepchat-skills/ + README.md + skills/ + old-review/ + SKILL.md + assets/ + references/ + scripts/ + guizang-ppt/ + SKILL.md +``` + +## Ownership Rules + +| Location | Owner | +| --- | --- | +| Real directory under configured DeepChat skills path | DeepChat | +| Real directory under a supported agent skills path | Agent | +| Agent path is a symlink/junction to DeepChat skills path | DeepChat | +| Agent path is a symlink to another location | External link | +| Agent path is a symlink/junction whose target is missing | Broken link | + +DeepChat runtime reads only managed DeepChat skills and plugin-contributed runtime skills. It must +not require any metadata directory inside the skills path. Legacy `.deepchat-meta`, backups, temp +directories, and agent backup residue must be ignored during migration/scanning. + +## Supported Agent Management Targets + +V1 link/adopt supports only user-level folder-format tools: + +- `claude-code` +- `codex` +- `cursor` +- `opencode` +- `goose` +- `kilocode` +- `copilot-user` + +V1 does not link/adopt project-level or single-file tools: + +- `cursor-project` +- `windsurf` +- `copilot` +- `kiro` +- `antigravity` + +Those tools remain available through the existing import/export conversion flow. + +## Functional Requirements + +### Library + +- Users can view all DeepChat-managed skills, including disabled skills. +- Users can toggle a DeepChat-only disabled state. +- Users can open a reusable skill detail dialog from each row. +- Users can install a single DeepChat skill to a detected local agent from that skill row. +- Users can add skills from folder, ZIP, URL, or Git repository from the top add menu. +- The Library tab must not show the old external-tool import grid. +- Disabled skills remain on disk and remain eligible for agent links and manual export when the user + explicitly includes them. +- Disabled skills are excluded from DeepChat runtime prompt injection, automatic validation, and + active skill tool permissions. + +### Agents + +- Users can see detected supported agents as icon tab buttons matching the Library/external tool + button style. +- Users can select an agent and see the agent's skills directory, counts, and skill rows. +- Agent rows classify ownership and status without mutating files during scan. +- Agent rows clamp descriptions to a short preview and expose full content through the reusable + skill detail dialog. +- Agent-owned folder skills can be adopted into DeepChat after a preview and confirmation. +- DeepChat-linked skills show link details and do not offer a primary mutation button. +- Broken DeepChat-created links can be repaired when the canonical DeepChat skill still exists. +- DeepChat-created links can be removed without deleting the canonical DeepChat skill. +- Agents tab must not show a bulk "Sync to Agent" action; installing DeepChat skills to agents is a + Library row action. + +### Add Skill + +- Users can install selected skills from a Git repository. +- Folder, ZIP, URL, and Git installation share the top add menu instead of a separate Install tab. +- Git scan detects root `SKILL.md` as `single-skill`. +- Git scan detects `skills//SKILL.md` entries as `multi-skill`. +- Git install records source provenance in database state. + +### Sync Directory + +- Users can set a sync directory. +- Export writes selected skills to `/skills/`. +- Import reads selected skills from `/skills/`. +- Import/export previews show new, same, modified, conflict, skipped, and failed items. +- Import/export updates database sync timestamps. +- This workflow is for local multi-skill repository backup/migration, not for installing a skill to + an agent. + +## UX Shape + +The existing `settings-skills` route becomes a smaller tabbed work surface: + +```txt ++--------------------------------------------------------------------------+ +| Skills [Search_______] [+ Add Skill] | +| Manage DeepChat skills and local agent links. | ++--------------------------------------------------------------------------+ +| [ Library ] [ Agents ] [ Sync Directory ] | ++--------------------------------------------------------------------------+ +| active tab content | ++--------------------------------------------------------------------------+ +``` + +Style contract: + +- Use the existing settings shell, shadcn controls, Iconify/lucide icons, and Tailwind utilities. +- Keep the page dense and operational. No hero, marketing panel, gradient background, or nested + cards. +- Use compact rows, 8px or smaller radius, semantic badges, and icon buttons with tooltips for + refresh/open/remove actions. +- Agent selector buttons use the same icon-leading button style as the Library external tool tiles: + icon, name, count badge, selected border. +- Use semantic color only as a secondary signal: + - Enabled/linked/success: green semantic badge. + - Disabled/skipped/neutral: muted badge. + - Conflict/warning: amber badge. + - Broken/failed/destructive: destructive badge. +- Every status must also have text; color alone is not enough. +- Long paths and descriptions truncate or clamp instead of wrapping over action controls. +- Primary action per row goes in the right column; secondary actions go in a row menu. + +Top add menu: + +```txt ++----------------------------------+ +| + Add Skill | ++----------------------------------+ +| Folder... | +| ZIP... | +| URL... | +| Git repository... | ++----------------------------------+ +``` + +Git repository install opens from the top add menu, not from a tab: + +```txt ++--------------------------------------------------------------------------+ +| Install from Git | ++--------------------------------------------------------------------------+ +| Repository URL | +| [https://github.com/op7418/guizang-ppt-skill______________] [Scan] | +| | +| Detected format: single-skill | +| [x] guizang-ppt-skill No conflict | +| | +| Conflict strategy | +| (*) Rename new skill ( ) Replace existing ( ) Skip existing | +| | +| [Cancel] [Install to DeepChat] | ++--------------------------------------------------------------------------+ +``` + +Library tab: + +```txt ++--------------------------------------------------------------------------+ +| Library [Open Folder] | ++--------------------------------------------------------------------------+ +| Summary: 18 skills - 15 enabled - 3 disabled - 4 agent links | +| | +| [wand] guizang-ppt | +| Create PowerPoint decks from structured plans. | +| Git install Enabled Claude [Install to Agent] [on] | +| | +| [wand] frontend-design | +| UI and UX implementation guidance. | +| Built-in Enabled - [Install to Agent] [on] | +| | +| [wand] old-review | +| Review legacy code paths. | +| Adopted Disabled Codex [Install to Agent] [off]| +| | +| Empty: No skills installed. Use Add Skill to add folder, ZIP, URL, Git. | ++--------------------------------------------------------------------------+ +``` + +Library row interaction: + +```txt +Click a non-control area of a Library row -> open Skill Detail. +The exposed hot controls are: + +[Install to Agent] Install to Agent +[on/off] Enable or disable in DeepChat +``` + +Skill detail: + +```txt ++--------------------------------------------------------------------------+ +| G guizang-ppt [Install to Agent] | +| Create PowerPoint decks from structured plans. Enabled [] | +| /Users/.../.deepchat/skills/guizang-ppt/SKILL.md [Edit] [Delete] | +| | +| +----------------------------------------------------------------------+ | +| | Rendered Markdown preview of SKILL.md without YAML frontmatter | | +| +----------------------------------------------------------------------+ | ++--------------------------------------------------------------------------+ + +Edit mode keeps the same dialog: + ++--------------------------------------------------------------------------+ +| G guizang-ppt [Install to Agent] | +| /Users/.../.deepchat/skills/guizang-ppt/SKILL.md [Preview] [Delete] | +| | +| Name: guizang-ppt (read-only) | +| Description: [.........................................................] | +| Allowed tools: [Read, Bash] | +| Content: | +| +----------------------------------------------------------------------+ | +| | # guizang-ppt | | +| | ... | | +| +----------------------------------------------------------------------+ | +| [Cancel] [Save] | ++--------------------------------------------------------------------------+ +``` + +Install one skill to a detected local agent: + +```txt ++--------------------------------------------------+ +| Install guizang-ppt to Agent | ++--------------------------------------------------+ +| Target agent | +| [ Claude Code ] [ OpenAI Codex ] [ Cursor ] | +| [ OpenCode ] [ Goose ] [ Kilo Code ] | +| | +| Result | +| ~/.codex/skills/guizang-ppt -> DeepChat skill | +| | +| Conflict strategy | +| (*) Rename link ( ) Replace DeepChat-owned link | +| ( ) Skip | +| | +| [Cancel] [Install] | ++--------------------------------------------------+ +``` + +Library row behavior: + +- Enabled/disabled toggle changes only DeepChat runtime state. +- Disabled rows remain visible and editable, but their badge is muted and activation controls are + disabled where runtime selection appears. +- Built-in or plugin-owned rows do not show destructive actions unless the existing system already + supports that action. +- The old external-tool import grid is removed from this tab. + +Agents tab: + +```txt ++--------------------------------------------------------------------------+ +| Agents [Refresh] | ++--------------------------------------------------------------------------+ +| [ icon Claude Code 0 ] [ icon OpenAI Codex 2 ] [ icon Cursor 0 ] | +| [ icon OpenCode 0 ] [ icon Goose 0 ] [ icon Kilo Code 0 ] | ++--------------------------------------------------------------------------+ +| OpenAI Codex Available | +| /Users/me/.codex/skills | +| 2 skills - 0 linked - 2 agent owned - 0 conflict - 0 broken | ++------------------+--------------+--------------+----------+------------+ +| Skill | Owner | Status | Preview | Action | ++------------------+--------------+--------------+----------+------------+ +| hatch-pet | Codex | Agent owned | View | Adopt | +| native-feel | Codex | Agent owned | View | Adopt | ++------------------+--------------+--------------+----------+------------+ +``` + +Agent row rules: + +- Description stays clamped to one line or is omitted from the table. +- Full description and `SKILL.md` body are shown through the reusable detail dialog. +- The tab does not show "Sync to Agent"; linking DeepChat skills to agents starts from Library. + +Agent row states: + +```txt +Agent owned: ++------------------+--------------+--------------+----------+------------+ +| old-review | Claude Code | Agent owned | View | Adopt | ++------------------+--------------+--------------+----------+------------+ + +DeepChat linked: ++------------------+--------------+--------------+----------+------------+ +| guizang-ppt | DeepChat | Linked | View | ... | ++------------------+--------------+--------------+----------+------------+ +menu: Open in Finder, Remove link + +External link: ++------------------+--------------+--------------+----------+------------+ +| docs-writer | External | Linked out | View | Adopt | ++------------------+--------------+--------------+----------+------------+ + +Conflict: ++------------------+--------------+--------------+----------+------------+ +| frontend-helper | Claude Code | Conflict | View | Resolve | ++------------------+--------------+--------------+----------+------------+ + +Broken link: ++------------------+--------------+--------------+----------+------------+ +| broken-ppt | DeepChat | Broken link | View | Repair | ++------------------+--------------+--------------+----------+------------+ +``` + +Adopt confirmation: + +```txt ++--------------------------------------------------+ +| Adopt Skill | ++--------------------------------------------------+ +| old-review | +| | +| Current location | +| ~/.claude/skills/old-review | +| | +| After adoption | +| ~/.deepchat/skills/old-review | +| ~/.claude/skills/old-review -> DeepChat skill | +| | +| Backup | +| ~/.deepchat/backups/skill-adoptions/... | +| | +| [Cancel] [Adopt] | ++--------------------------------------------------+ +``` + +Conflict resolver: + +```txt ++--------------------------------------------------+ +| Resolve Conflict | ++--------------------------------------------------+ +| frontend-helper | +| | +| Agent | +| ~/.claude/skills/frontend-helper | +| | +| DeepChat | +| ~/.deepchat/skills/frontend-helper | +| | +| Choose action | +| (*) Adopt as frontend-helper-claude | +| ( ) Replace DeepChat frontend-helper | +| ( ) Keep current state | +| | +| [Cancel] [Apply] | ++--------------------------------------------------+ +``` + +Custom path: + +```txt ++--------------------------------------------------+ +| Add Custom Agent Path | ++--------------------------------------------------+ +| Display name | +| [My Agent ] | +| | +| Skills directory | +| [/Users/me/.my-agent/skills ] | +| | +| Format | +| (*) SKILL.md folder format | +| | +| [Cancel] [Scan path] | ++--------------------------------------------------+ +``` + +Reusable skill detail: + +```txt ++----------------------------------------------------------+ +| C | +| ComputerUse skill [Switch] [...]| +| Drive the user's desktop GUI through ... | +| | +| +------------------------------------------------------+ | +| | Computer Use | | +| | Rendered Markdown from SKILL.md | | +| | ... | | +| +------------------------------------------------------+ | +| | +| [Try in Chat] | ++----------------------------------------------------------+ +``` + +The same dialog is used from Library rows and Agents rows. It receives a source descriptor and +renders the manifest summary plus sanitized Markdown body. + +Sync Directory tab: + +```txt ++--------------------------------------------------------------------------+ +| Sync Directory | ++--------------------------------------------------------------------------+ +| Local multi-skill repository | +| [~/Documents/deepchat-skills____________________________] [Browse] [Save] | ++--------------------------------------------------------------------------+ +| [ Export to directory ] [ Import from directory ] | ++--------------------------------------------------------------------------+ +| Export selected skills | +| [x] guizang-ppt Enabled Git install | +| [x] frontend-design Enabled Built-in | +| [ ] old-review Disabled Adopted | +| | +| [Preview Export] [Export Now] | ++--------------------------------------------------------------------------+ +``` + +Import preview: + +```txt ++--------------------------------------------------------------------------+ +| Import from ~/Documents/deepchat-skills | ++----------------------+-------------+---------------+---------------------+ +| Skill | State | Source | Action | ++----------------------+-------------+---------------+---------------------+ +| guizang-ppt | Same | sync dir | Skip | +| frontend-design | New | sync dir | Import | +| skill-x | Conflict | sync dir | Rename | +| broken-skill | Invalid | sync dir | View error | ++----------------------+-------------+---------------+---------------------+ +| Conflict strategy: (*) Rename imported ( ) Replace local ( ) Skip | +| [Cancel] [Import Selected] | ++--------------------------------------------------------------------------+ +``` + +## Non-Goals + +- No automatic scheduled sync. +- No built-in Git commit, pull, or push. +- No marketplace search or command-copy Discover tab. +- No project-level agent link/adopt. +- No conversion of single-file prompt formats into linked folder-format skills during adoption. +- No cloud sync. +- No separate Install tab; install flows start from the top add menu. +- No new dependency unless an existing standard library or installed dependency is insufficient. + +## Acceptance Criteria + +- Database state is created or migrated without deleting existing skills or legacy sidecar runtime + configs. +- Disabling a skill persists across restart, remains visible in Library, and excludes that skill + from DeepChat runtime metadata prompt and active-skill validation. +- The configured DeepChat skills path contains only skill content directories. It must not contain + `.deepchat-meta`, metadata files, backups, temp files, or other management metadata after + migration. +- Legacy `.deepchat-meta` runtime config files are migrated into the database and removed only after + the database write succeeds. +- Supported agents are shown only when detected locally. +- Supported agents use icon-leading tab buttons with counts. +- Project-level and single-file tools are not offered link/adopt actions. +- Scanning an agent never creates, deletes, or moves files. +- Agents table descriptions are clamped or omitted, and full skill content is available through the + reusable skill detail dialog with Markdown rendering. +- Adopting an agent-owned skill creates `~/.deepchat/skills//SKILL.md`, stores the original + under `~/.deepchat/backups/skill-adoptions/...`, and replaces the agent path with a link to the + canonical DeepChat skill. +- Agent skills directories do not receive backup, temp, rollback, or metadata folders. +- Same-name conflicts default to creating a unique adopted skill name instead of overwriting the + existing DeepChat skill. +- Installing one Library skill to an agent creates or repairs only DeepChat-owned links and does not + delete agent-owned skill directories unless the user explicitly chooses a conflict strategy. +- The top add menu exposes folder, ZIP, URL, and Git install paths. +- Git single-skill and multi-skill repositories install selected skills into DeepChat and write + `git-install` provenance. +- Manual export creates a valid multi-skill repository layout. +- Manual import handles new, same, modified, and conflict states before writing. +- The old external-tool import grid, separate Install tab, Discover tab, and `find-skills` bundled + skill are removed from the settings surface. + +## Critical Acceptance Scenarios + +### Agent-Owned Adoption + +```txt +Given ~/.claude/skills/old-review/SKILL.md exists +And ~/.deepchat/skills/old-review does not exist +When the user adopts old-review from Claude Code +Then ~/.deepchat/skills/old-review/SKILL.md exists +And ~/.claude/skills/old-review links to ~/.deepchat/skills/old-review +And the original is backed up under ~/.deepchat/backups/skill-adoptions +And database state source.type is adopted +``` + +### Clean Agent Directory + +```txt +Given a user adopts ~/.claude/skills/old-review +Then ~/.claude/skills contains old-review as a link +And ~/.claude/skills does not contain old-review.deepchat-backup-* +And scan shows one old-review row +``` + +### DeepChat Linked Display + +```txt +Given ~/.claude/skills/guizang-ppt links to ~/.deepchat/skills/guizang-ppt +Then the Agents table shows: +Skill = guizang-ppt +Owner = DeepChat +Status = Linked +Action = row menu only +``` + +### Conflict Adoption + +```txt +Given ~/.claude/skills/frontend-helper exists +And ~/.deepchat/skills/frontend-helper exists +And their content hashes differ +When the user chooses "Adopt as frontend-helper-claude" +Then ~/.deepchat/skills/frontend-helper remains unchanged +And ~/.deepchat/skills/frontend-helper-claude is created +And ~/.claude/skills/frontend-helper links to the renamed DeepChat skill +``` + +### Git Installation + +```txt +Given a repo root contains SKILL.md +When the user opens Add Skill -> Git repository, scans, and installs it +Then the selected skill is copied to the DeepChat skills path +And database state source.type is git-install +And database state source.repoFormat is single-skill + +Example: `https://github.com/op7418/guizang-ppt-skill` is a root `SKILL.md` repository whose +frontmatter skill name is `guizang-ppt-skill`. + +Given a repo contains skills/a/SKILL.md and skills/b/SKILL.md +When the user selects a and b +Then both skills are installed +And database state source.repoFormat is multi-skill +``` + +### Library Install To Agent + +```txt +Given ~/.deepchat/skills/guizang-ppt/SKILL.md exists +And ~/.codex/skills is detected +When the user chooses Install to Agent from the guizang-ppt Library row +Then ~/.codex/skills/guizang-ppt links to ~/.deepchat/skills/guizang-ppt +And database state records the Codex agent link +And the Agents tab later shows guizang-ppt as DeepChat linked + +Given guizang-ppt is already linked to ~/.codex/skills/guizang-ppt +When the user opens Install to Agent and selects Codex +Then the dialog shows a Disconnect action +And Disconnect removes the DeepChat-owned Agent link +And database state removes the Codex agent link record +``` + +### Library Row And Detail Interaction + +```txt +Given a Library skill row is visible +When the user clicks any non-control area of the row +Then the Skill Detail dialog opens +And the row does not expose a standalone View details action +And the row keeps Install to Agent and DeepChat enable/disable as visible controls + +Given the Skill Detail dialog is open for a mutable skill +When the user chooses Edit +Then the dialog switches to editable name, description, allowed tools, and Markdown content fields +And Delete is next to Edit/Preview inside the same dialog +And Delete requires a second confirmation before removing the skill +And Install to Agent and DeepChat enable/disable are also available inside the detail dialog +``` + +### Skill Detail Preview + +```txt +Given an agent skill has a long description +When the user views the agent row +Then the table does not expand horizontally for the full description +And clicking the row detail affordance opens a detail dialog +And the dialog renders the selected SKILL.md body as Markdown +``` + +### Manual Export + +```txt +Given sync directory is ~/Documents/deepchat-skills +And selected skills are a and b +When the user exports +Then ~/Documents/deepchat-skills/skills/a/SKILL.md exists +And ~/Documents/deepchat-skills/skills/b/SKILL.md exists +And database sync lastExportAt is updated +``` + +### DeepChat-Only Disable + +```txt +Given skill a exists in the DeepChat skills path +When the user disables a in Library +Then database state marks skill a as DeepChat-disabled +And Library still shows a +And getMetadataPrompt excludes a +And existing agent links remain unchanged +``` + +## Resolved Assumptions + +- `~/.deepchat/skills` means the configured skills path when the user changed `skillsPath`. +- Skill management database state is local-only and not automatically synchronized. +- Existing `.deepchat-meta/.json` runtime config is legacy migration input; new writes go to + the database. +- Plugin-contributed skills remain read-only runtime contributions and are not adopted, linked, + exported by default, or moved into database-owned management state. diff --git a/docs/features/deepchat-skills-management/tasks.md b/docs/features/deepchat-skills-management/tasks.md new file mode 100644 index 000000000..7a115e310 --- /dev/null +++ b/docs/features/deepchat-skills-management/tasks.md @@ -0,0 +1,189 @@ +# DeepChat Skills Management Tasks + +Status: Phase 1 through Phase 8 are implemented for the supported V1.1 paths. V1.1 keeps the +working backend paths, removes the over-split Install/Discover UI, moves install-to-agent into +Library rows, and keeps sync directory as a separate local repository workflow. Adoption still +defaults conflicts to safe rename; destructive overwrite and custom agent path management remain +deferred until the agent registry has a durable custom-target model. +The standalone draft has been absorbed into this SDD folder and removed. + +## Phase 0: Design Contract Check + +- [x] Keep the `settings-skills` route as one tabbed settings surface. +- [x] Match the ASCII layouts in `spec.md` before implementing UI. +- [x] Use existing shadcn settings controls and lucide/Iconify icons. +- [x] Use compact row/table layouts; do not add hero panels or nested cards. +- [x] Add loading, empty, permission-error, conflict, broken-link, and invalid-skill states. +- [x] Ensure every status uses text, not color alone. +- [x] Truncate long paths and descriptions with tooltips. +- [x] Keep all user-facing labels in i18n files. + +## Phase 1: Database State And Library Disable + +- [x] Add `src/shared/types/skillManagement.ts`. +- [x] Add database-backed management state helper under `src/main/presenter/skillPresenter/`. +- [x] Store source provenance, disabled state, runtime extension settings, sync config, and agent + links in the application database. +- [x] Migrate legacy `/.deepchat-meta/.json` runtime configs into database state. +- [x] Remove migrated legacy `.deepchat-meta` files after successful database write. +- [x] Stop writing new `.deepchat-meta` files under the skills path. +- [x] Add a test that the configured skills path remains a pure skill content directory. +- [x] Add `getUnifiedSkillCatalog()` to `ISkillPresenter`. +- [x] Add `setSkillDeepChatDisabled()` to `ISkillPresenter`. +- [x] Add typed routes and `SkillClient` methods for unified catalog and disabled toggle. +- [x] Filter disabled skills from `getMetadataPrompt()`. +- [x] Filter disabled skills from `loadSkillContent()`. +- [x] Filter disabled skills from `validateSkillNames()`. +- [x] Keep disabled skills visible in the Library catalog. +- [x] Add Library disabled toggle UI and i18n strings. +- [x] Add unit tests for database migration, persistence, disabled filtering, and restart behavior. + +## Phase 2: Agents Scan And Classification + +- [x] Add shared agent-management types for installed agents, skill rows, owners, statuses, and + actions. +- [x] Add route contracts for agent scan/list/detail. +- [x] Keep read-only classification helpers inside `SkillSyncPresenter`; defer a separate + `agentManagement.ts` until adoption/repair/remove logic needs it. +- [x] Reuse `toolScanner.getAllTools()` and filter link/adopt support to user-level + `*/SKILL.md` tools. +- [x] Implement read-only installed-agent detection. +- [x] Implement read-only skill row classification. +- [x] Exclude project-level and single-file tools from link/adopt actions. +- [x] Add `SkillSyncClient` methods for agent scan/list/detail. +- [x] Add `SkillAgentsTab.vue` and `AgentSkillTable.vue`. +- [x] Match the read-only Agents tab ASCII layout, row states, and action placement from `spec.md`; + keep write actions disabled until Phase 3. +- [x] Add tests for linked, agent-owned, external-link, broken-link, and conflict classification. + +## Phase 3: Adoption And Agent Links + +- [x] Add adopt preview route and presenter method. +- [x] Add adopt execute route and presenter method. +- [x] Copy adoption sources through `~/.deepchat/tmp/skill-adoptions/`. +- [x] Move original agent content to `~/.deepchat/backups/skill-adoptions/...`. +- [x] Replace adopted agent path with a symlink or Windows junction. +- [x] Record database source provenance and `agentLinks`. +- [x] Add default conflict strategy: adopt as `-`. +- [x] Add sync-to-agent preview route and presenter method. +- [x] Add sync-to-agent execute route and presenter method. +- [x] Add repair DeepChat-owned link route and presenter method. +- [x] Add remove DeepChat-owned link route and presenter method. +- [x] Add `AdoptSkillDialog.vue` and wire adopt/resolve-conflict rows to the existing adoption + backend. +- [x] Match the adopt confirmation ASCII layout from `spec.md`. +- [ ] Add destructive overwrite/keep adoption conflict strategies after the custom agent ownership + model is durable enough to support them safely. +- [x] Add batch sync-to-agent backend; the original dialog was superseded by the Phase 8 Library + row flow. +- [x] Match sync-to-agent dialog ASCII layout from `spec.md` for the first V1 slice. +- [ ] Match custom-path dialog ASCII layout when custom agent targets are implemented. +- [x] Add tests that agent directories never receive backup/temp/meta folders. +- [x] Add renderer tests for the adopt preview/execute confirmation flow. +- [x] Add tests for repair/remove refusing links not created by DeepChat. + +## Phase 4: Git Install + +- [x] Add Git scan route and `SkillClient` method. +- [x] Add Git install route and `SkillClient` method. +- [x] Clone repos to `~/.deepchat/tmp/skill-installs/`. +- [x] Detect root `SKILL.md` as `single-skill`. +- [x] Detect `skills//SKILL.md` entries as `multi-skill`. +- [x] Reuse existing skill validation before copy. +- [x] Support `rename`, `overwrite`, and `skip` conflict strategies. +- [x] Record `git-install` provenance in database state. +- [x] Clean temp clones on success and failure. +- [x] Add `InstallFromGitDialog.vue` and wire it into the Install tab. +- [x] Match the Install tab Git scan/select/conflict ASCII layout from `spec.md`. +- [x] Add tests for single-skill, multi-skill, conflict strategy, and temp cleanup. + +## Phase 5: Sync Directory Import / Export + +- [x] Add sync directory config types and routes. +- [x] Add `getSkillsSyncConfig()` and `setSkillsSyncDirectory()`. +- [x] Add export preview and execute routes. +- [x] Export selected skills to `/skills/`. +- [x] Write `README.md` when missing. +- [x] Exclude disabled skills by default and allow explicit inclusion. +- [x] Add import preview and execute routes. +- [x] Scan `/skills/*/SKILL.md` only. +- [x] Preview `new`, `same`, `modified`, `conflict`, and `invalid`. +- [x] Apply `rename`, `overwrite`, and `skip`. +- [x] Record `imported` provenance and import/export timestamps. +- [x] Add `SkillImportExportTab.vue`. +- [x] Match export and import preview ASCII layouts from `spec.md`. +- [x] Add tests for export layout and import conflict states. + +## Phase 6: Discover + +- [x] Add `resources/skills/find-skills/SKILL.md`. +- [x] Confirm built-in installation picks up the new skill on first run. +- [x] Add `SkillDiscoverTab.vue`. +- [x] Match the Discover tab ASCII layout from `spec.md`. +- [x] Show local `find-skills` status and command-oriented actions. +- [x] Add i18n strings. +- [x] Add a renderer test that the Discover tab renders through the five-tab surface coverage. + +## Phase 7: Settings Surface And Smoke Coverage + +- [x] Convert `SkillsSettings.vue` into Library, Agents, Import / Export, Install, and Discover tabs. +- [x] Keep existing folder/ZIP/URL install behavior reachable. +- [x] Keep existing external tool import/export behavior reachable. +- [x] Keep existing first-launch sync prompt behavior or explicitly remove it from the UX if the new + tabs replace it. +- [x] Extend `test/renderer/api/clients.test.ts` for new typed routes. +- [x] Extend skills route smoke tests for new read-only routes. +- [x] Extend skill sync smoke tests for read-only agent scan routes. +- [x] Run `pnpm run format`. +- [x] Run `pnpm run i18n`. +- [x] Run `pnpm run lint`. +- [x] Run targeted main and renderer tests for touched skills modules. + +## Phase 8: V1.1 UX Consolidation + +- [x] Reduce `SkillsSettings.vue` tabs to Library, Agents, and Sync Directory. +- [x] Remove the separate Install tab and `SkillInstallTab.vue`. +- [x] Remove the Discover tab, `SkillDiscoverTab.vue`, `resources/skills/find-skills/`, and related + i18n/tests. +- [x] Keep folder, ZIP, URL, and Git installs reachable from the top Add Skill menu. +- [x] Move Git repo install entry into the Add Skill menu and reuse the existing Git scan/install + backend. +- [x] Remove the top Export action; replace that flow with per-skill Library Install to Agent. +- [x] Remove the old external-tool import grid from the Library tab. +- [x] Add Library row action: Install to Agent. +- [x] Let Install to Agent choose from detected local user-level folder-format agents only. +- [x] Reuse existing link/sync-to-agent backend for the one-skill Library row flow. +- [x] Let Install to Agent disconnect an already linked Agent via the existing remove-link backend. +- [x] Remove the bulk Sync to Agent button from the Agents tab. +- [x] Change Agents selector buttons to icon-leading tab buttons with count badges. +- [x] Clamp or omit long descriptions in the Agents skill table. +- [x] Add reusable `SkillDetailDialog.vue` for Library and Agents rows. +- [x] Render the selected `SKILL.md` body as Markdown inside the detail dialog. +- [x] Make Library row non-control area open the detail dialog directly. +- [x] Keep Library row Install to Agent and DeepChat enable/disable as exposed controls. +- [x] Move mutable skill edit/save into `SkillDetailDialog.vue`. +- [x] Remove the standalone `SkillEditorSheet.vue` path after merging edit into detail. +- [x] Move mutable skill delete into `SkillDetailDialog.vue` with second confirmation. +- [x] Keep Install to Agent and DeepChat enable/disable available inside the detail dialog. +- [x] Add detail route/client methods for DeepChat skills and scanned agent skills. +- [x] Rename the manual import/export UI copy to Sync Directory to separate it from agent install. +- [x] Keep sync directory import/export backend intact for local multi-skill repository backup and + migration. +- [x] Update all user-facing strings in every locale with local-language translations. +- [x] Update renderer tests for three tabs, Add Skill Git install, Library Install to Agent, agent + icon tabs, detail dialog, and absence of Discover/Install tabs. +- [x] Update main/contract tests for skill detail routes. +- [x] Run `pnpm run format`. +- [x] Run `pnpm run i18n`. +- [x] Run `pnpm run lint`. +- [x] Run targeted main and renderer tests for touched skills modules. + +## Deferred + +- [ ] Built-in Git commit/pull/push for the sync directory. +- [ ] Automatic scheduled sync. +- [ ] Custom agent path registry and UI. +- [ ] Destructive overwrite/keep adoption conflict strategies. +- [ ] Project-level agent adoption. +- [ ] Single-file prompt adoption into folder-format skills. +- [ ] Deep external marketplace search integration. diff --git a/docs/features/plugins-hub/plan.md b/docs/features/plugins-hub/plan.md new file mode 100644 index 000000000..a88fc62d5 --- /dev/null +++ b/docs/features/plugins-hub/plan.md @@ -0,0 +1,454 @@ +# Plugins Hub Implementation Plan + +## Strategy + +Do not build a new plugin platform and do not add a new window. Move the UI ownership boundary into +the existing main window. + +The smallest reliable implementation is: + +- Add `/plugins` routes to the existing main renderer router. +- Keep `WindowSideBar` and `AppBar` as the app shell around the Plugins page. +- Reuse current clients/stores/components where they already own behavior. +- Treat Remote channels as renderer-only virtual plugin cards backed by existing `remoteControl.*` routes. +- Hide Settings navigation entries for Plugins-owned areas, while keeping compatibility redirects. +- Update the main sidebar expanded layout only; keep collapsed behavior unchanged. + +No data migration is required. + +## Main Route Architecture + +```text +Main window + App.vue + AppBar + WindowSideBar + RouterView + /chat -> ChatTabView + /welcome -> WelcomePage + /plugins -> PluginsHubPage + /plugins + /plugins/skills + /plugins/mcp + /plugins/:pluginId +``` + +Use the existing `src/renderer/src/router/index.ts`. Do not add `src/renderer/plugins`, a new Vite +entry, or a new BrowserWindow. + +Route names can be: + +```text +plugins +plugins-skills +plugins-mcp +plugins-detail +``` + +External/main-process callers should not know UI component internals. Reuse the existing app-runtime event path where possible. Only add a narrow route if a future main-process caller needs generic main-window navigation: + +```text +system.openMainRoute({ routeName: 'plugins-mcp', params? }) -> { focused: boolean } +``` + +For the first increment, MCP install deeplinks reuse `DEEPLINK_EVENTS.MCP_INSTALL` and the main app deeplink handler routes the renderer to `/plugins/mcp`. Do not add a Plugins-specific window route. + +## Affected Boundaries + +| Boundary | Required change | +| --- | --- | +| `src/renderer/src/router/index.ts` | Add `/plugins` route family | +| `src/renderer/src/App.vue` | Keep existing shell; ensure `/plugins` receives same global overlays/theme/i18n | +| `WindowSideBar.vue` | Add expanded command list and route Plugins row to `/plugins` | +| `renderer/api` | Reuse existing clients; add a main-window navigation client only if a generic main-process caller appears | +| `shared/contracts/routes` | Add narrow focus/navigate route only if deeplink/main process cannot use existing event path | +| Settings renderer | Remove/hide Plugins-owned nav entries and overview links | +| Deeplink presenter | Route MCP install deeplink to main `/plugins/mcp` page | +| Plugin presenter | Stop using per-plugin BrowserWindow as primary UI path | + +## Data Ownership + +Do not create a persisted unified plugin table. + +Use a renderer-only union for cards: + +```text +CatalogItem = + official plugin item from plugins.list + Remote virtual item from remoteControl.listChannels + status +``` + +`MCP` and `Skills` are top-level sibling tabs under `/plugins`, not catalog cards. This union only drives plugin catalog rendering and search filtering. Writes go back to the current owner: + +| User action | Owner route/client | +| --- | --- | +| Enable official plugin | `PluginClient.enablePlugin` | +| Disable official plugin | `PluginClient.disablePlugin` | +| CUA runtime/permission action | `PluginClient.invokeAction` existing runtime actions | +| MCP add/edit/toggle | `McpClient` / `useMcpStore` existing paths | +| Skill install/edit/delete/sync | `SkillClient` / `SkillSyncClient` / `useSkillsStore` | +| Remote enable/save/pair/remove binding | `RemoteControlClient` | + +## Page Shell + +Create the Plugins UI under the existing renderer: + +```text +src/renderer/src/pages/plugins/ +├── PluginsHubPage.vue +├── PluginsCatalogPage.vue +├── OfficialPluginDetailPage.vue +├── McpPluginsPage.vue +├── SkillsPluginsPage.vue +├── components/ +│ ├── PluginsTopTabs.vue +│ ├── PluginCatalogGrid.vue +│ ├── PluginCatalogCard.vue +│ ├── AddedPluginsStrip.vue +│ └── PluginSearchBar.vue +└── composables/ + ├── usePluginCatalog.ts + └── useRemotePluginItems.ts +``` + +Keep this list flexible during implementation; do not split files unless the component becomes hard to read. + +Visual baseline: + +- Main content starts with top tabs (`Plugins`, `Skills`, `MCP`). +- Catalog page uses the Codex-like layout: title, subtitle, search, added strip, segmented filters, sectioned list. +- Catalog cards include official plugins and Remote virtual plugins; MCP and Skills remain reachable through top tabs. +- Remote does not have a top tab or product list route. Each channel opens as a virtual plugin detail. +- Avoid settings-style full-width form pages for the catalog. Detail routes may use denser settings sections. +- Cards are individual repeated items only. Do not put page sections inside floating cards. + +## Route and Navigation Behavior + +Renderer-side navigation: + +| Trigger | Behavior | +| --- | --- | +| Sidebar `Plugins` row | `router.push({ name: 'plugins' })` | +| Top tab `Skills` | `router.push({ name: 'plugins-skills' })` | +| Top tab `MCP` | `router.push({ name: 'plugins-mcp' })` | +| Plugin card/detail | `router.push({ name: 'plugins-detail', params: { pluginId } })` | +| Remote channel card/detail | `router.push({ name: 'plugins-detail', params: { pluginId: 'remote:' } })` | +| `New Chat` row while on `/plugins` | `router.push({ name: 'chat' })`, then start new conversation | + +Main-process initiated navigation: + +- MCP install deeplink must focus the main window and route to `/plugins/mcp`. +- Historical Settings route redirects can focus main and route to the matching `/plugins...` route. +- If the main window does not exist, create/focus the normal app window, not a Plugins window. + +## Official Plugin Detail + +Current behavior opens `PluginPresenter.openPluginSettingsWindow(pluginId)`. + +Target behavior: + +- List page opens `/plugins/:pluginId`. +- Detail page loads `plugins.get(pluginId)`. +- Enable/disable remains in detail and list. +- Runtime status and MCP status remain visible. +- Known first-party plugin actions are exposed as native detail sections: + - `runtime.getStatus` + - `runtime.checkPermissions` + - `runtime.openPermissionGuide` + +Do not add a generic embedded HTML settings host in the first increment. Current shipped plugins are first-party (`cua`, `feishu`), so native Vue detail pages are enough and safer than enabling arbitrary webview/iframe behavior. + +Legacy fallback: + +- Keep `settingsContributions` in manifests during migration. +- Keep `settings.open` action available only as a temporary compatibility path if some old package still calls it. +- The first-party UI must not call `settings.open`. + +When to add a generic plugin settings host: + +- Only when third-party plugin settings contributions are a supported product requirement. +- Use an isolated child WebContents/WebContentsView with the plugin-specific preload, not an iframe without preload. +- Keep external navigation denied. + +## MCP Migration + +`McpSettings.vue` already owns most behavior. Move by reuse, not rewrite. + +Recommended first pass: + +- Create `McpPluginsPage.vue`. +- Import/reuse `McpServers`, `McpBuiltinMarket`, NPM registry controls, guide overlay only if still needed. +- Preserve current route query shape for market view inside Plugins (`/plugins/mcp?view=market`). +- Move deeplink handler from Settings bootstrap to main app or Plugins page bootstrap for MCP install. + +Compatibility: + +- `deepchat://mcp/install` focuses main window and routes to `/plugins/mcp`. +- Hidden `settings-mcp` route can redirect/open main `/plugins/mcp` during transition. + +Settings cleanup: + +- Remove visible `settings-mcp` navigation item. +- Remove MCP Overview metric. +- Remove `start-mcp` quick task or replace it with a non-Plugins Settings task. + +## Skills Migration + +`SkillsSettings.vue` can become a Plugins page with minimal changes: + +- Rename/wrap visually as `SkillsPluginsPage`. +- Keep `SkillCard`, `SkillInstallDialog`, `SkillEditorSheet`, `SkillSyncDialog`, `SyncStatusSection`. +- Keep draft suggestion toggle. +- Keep first-launch sync prompt if product still relies on it. + +Avoid duplicating the skills store or install logic. + +Compatibility: + +- Hidden `settings-skills` route should route the main window to `/plugins/skills`. +- Settings Overview search should not list Skills. + +## Remote Migration + +First increment: reuse `RemoteSettings.vue` inside `/plugins/:pluginId` for virtual plugin ids such as `remote:telegram`. The detail shell owns the plugin-style enable/disable button, while single-channel mode hides the old Remote tab strip and the embedded channel toggle. Feishu/Lark Integration uses the same hide-toggle mode so its top-level plugin enable button controls both the official plugin and Feishu/Lark Remote. This keeps the existing credential, pairing, default agent/workdir, bindings and WeChat iLink behavior intact. + +Follow-up refactor: extract channel sections from `RemoteSettings.vue` into reusable components. The file is already large, but splitting it before moving the route would increase regression risk and delay the user-visible entry-point cleanup. + +Refactor only around real channel boundaries: + +```text +PluginsCatalogPage + -> virtual cards from listRemoteChannels() +PluginDetailPage(remote:) + -> channel header/status/toggle + -> credentials section + -> default agent/workdir section + -> pairing section when supportsPairing + -> bindings section + -> channel-specific section +``` + +Suggested extracted components: + +| Component | Scope | +| --- | --- | +| `RemotePluginCard` | card summary for one channel | +| `PluginDetailPage(remote:)` | detail shell and save status | +| `RemoteCredentialsSection` | token/app secret fields; channel-specific props | +| `RemoteDefaultsSection` | default agent and default workdir | +| `RemotePairingSection` | pair code and principals for pairable channels | +| `RemoteBindingsSection` | bound chats/groups/topics | +| `WeixinIlinkAccountsSection` | WeChat iLink login/account controls | + +Keep shared logic tiny: + +- load channel settings +- save channel settings +- load channel status +- load bindings/pairing + +Do not invent a generic form schema for all channels. + +Virtual item mapping: + +```text +remote: + kind: remote + title: channel title + ' Remote' when needed + description: descriptor.descriptionKey + enabled: status.enabled + state: status.state + detailRoute: /plugins/:pluginId +``` + +## Settings Removal and Redirects + +Change visible navigation source: + +- Remove or mark hidden: + - `settings-mcp` + - `settings-remote` + - `settings-plugins` + - `settings-skills` + +Because `settingsNavigation.ts` is the single source for Settings sidebar and Overview search, this should remove most visible Settings entries without scattered conditions. + +Route compatibility options: + +1. Keep hidden route items so old route names still exist. +2. When entered, focus main window and navigate to the mapped `/plugins...` route. +3. Do not show these items in Settings sidebar/search. + +Mapping: + +| Old Settings route | Main window target | +| --- | --- | +| `settings-mcp` | `/plugins/mcp` | +| `settings-remote` | `/plugins` | +| `settings-plugins` | `/plugins` | +| `settings-skills` | `/plugins/skills` | + +`settings-acp` stays in Settings for this feature. ACP is an agent/provider configuration surface, not part of the four requested Plugins-owned areas. + +## Sidebar Implementation Plan + +Current `WindowSideBar.vue` should not be rewritten. Modify the expanded right column header area. + +Before: + +```text +right column +├── header row: selectedAgentName + group toggle + plus +├── search input +├── pinned section +└── session groups +``` + +After: + +```text +right column +├── title row: selectedAgentName +├── command list +│ ├── New Chat +│ ├── Search +│ └── Plugins +├── blank spacer +├── pinned section when non-empty +├── Chat group +├── 工作区 header + existing group-mode/sort toggle +└── project groups +``` + +Command behavior: + +| Row | Existing behavior to call | +| --- | --- | +| New Chat | `router.push({ name: 'chat' })` then `sessionStore.startNewConversation({ refresh: true })` | +| Search | `spotlightStore.toggleSpotlight()` | +| Plugins | `router.push({ name: 'plugins' })` | + +Keep: + +- Agent icon rail. +- Settings/theme/sidebar controls in the existing left rail. +- collapsed width and transitions. +- session pagination and fill checks. +- pinned collapse behavior. +- project grouping/reorder behavior. +- existing group-mode/sort behavior, moved to the `工作区` header. +- shortcut badge logic for sessions. + +Question the old inline session search: + +- First increment should remove the inline search input from expanded sidebar to match the requested command-list shape. +- Search row opens Spotlight, which already searches sessions/messages/settings/actions. +- If users later need local-only filtering, add it inside Spotlight or as a session-list filter command, not as a second persistent input. + +Right column ordering: + +```text +所有 Agents + +New Chat +Search +Plugins + +Pinned (only if any) +... +Chat +... +工作区 [group/sort toggle] +project groups +... +``` + +Do not add Settings, theme, collapse, remote status or other rail controls into this right column. + +## Deeplinks and External Entry Points + +Update callers: + +| Current caller | New behavior | +| --- | --- | +| MCP install deeplink | focus main window, route to `/plugins/mcp`, dispatch MCP install event there | +| Settings sidebar old MCP/Skills/Plugins/Remote | no visible entry | +| Settings activity old route | focus main window and route to matching `/plugins...` page | +| Sidebar remote status button | route to the first enabled `remote:` plugin detail | +| Chat input MCP indicator `openSettings` text | route to `/plugins/mcp` | + +Provider install deeplink stays in Settings Provider. Do not route provider/model setup to Plugins. + +## i18n + +Add route/page labels: + +- `routes.plugins` +- `pluginsHub.title` +- `pluginsHub.subtitle` +- `pluginsHub.searchPlaceholder` +- `pluginsHub.tabs.plugins` +- `pluginsHub.tabs.skills` +- `pluginsHub.tabs.mcp` +- `pluginsHub.tabs.remote` +- sidebar command labels if existing `common.newChat` and spotlight labels are not enough. + +Avoid moving existing `settings.mcp`, `settings.skills`, `settings.remote` keys in the first increment. Reuse them from Plugins pages to keep the diff smaller. Later cleanup can rename namespaces if the old naming becomes misleading. + +## Testing Strategy + +Small checks with high signal: + +| Area | Tests | +| --- | --- | +| Main router | `/plugins` and child routes render inside app shell | +| Settings navigation | removed entries do not appear in `getSettingsNavigationGroups`; hidden redirects still resolve | +| Sidebar | expanded command rows render; collapsed state unchanged; Plugins row routes to `/plugins` | +| Remote virtual items | descriptors + status produce cards; detail saves via `remoteControl.saveChannelSettings` | +| Official plugin detail | list/detail enable-disable; settings button no longer calls `settings.open` | +| Deeplink | MCP install focuses main and routes to `/plugins/mcp` | + +Manual visual QA: + +- macOS light/dark with app sidebar and main content. +- Windows light/dark with main app shell. +- Linux opaque backgrounds. +- Narrow width with collapsed and expanded sidebar. +- Long remote token/error/path strings. +- Chinese and English labels. + +Final implementation gates: + +```bash +pnpm run format +pnpm run i18n +pnpm run lint +pnpm run typecheck +``` + +Renderer tests should be run for touched components. Full app smoke test should open Chat, Plugins and Settings separately. + +## Risks and Mitigations + +| Risk | Mitigation | +| --- | --- | +| Settings routes are used by deeplinks/onboarding | Keep hidden compatibility routes and redirect to main `/plugins...` | +| RemoteSettings monolith makes migration risky | Reuse it in single-channel mode; extract per-channel sections only when needed | +| Plugin settings HTML depends on plugin preload | Do not embed arbitrary HTML in first increment; build first-party native details | +| Feishu official plugin vs Feishu Remote naming collision | Merge Feishu Remote into the Feishu/Lark Integration detail page | +| Plugins page becomes another Settings | Catalog page stays Codex-like; detail pages are dense only where settings are unavoidable | +| Main route conflicts with chat internal `pageRouter` | Use Vue router for `/plugins`; keep `pageRouter` scoped to ChatTabView | +| Search behavior confusion | Sidebar Search row opens existing Spotlight; do not add a new search engine | + +## Rollout Plan + +1. Land `/plugins` main route skeleton with top tabs and catalog placeholder. +2. Move visible Settings entries out, with redirects to main Plugins routes. +3. Move MCP page and deeplink. +4. Move Skills page. +5. Add official plugin native list/detail and stop first-party UI from opening plugin settings windows. +6. Add Remote virtual plugin list/detail. +7. Update main sidebar expanded command list. +8. Run visual QA and clean up i18n/tests. + +This order keeps each PR reviewable and avoids breaking every surface at once. diff --git a/docs/features/plugins-hub/spec.md b/docs/features/plugins-hub/spec.md new file mode 100644 index 000000000..64376e7a6 --- /dev/null +++ b/docs/features/plugins-hub/spec.md @@ -0,0 +1,368 @@ +# Plugins Hub Specification + +## User Need + +DeepChat 的 Settings 里混入了高频工具能力、扩展能力和系统偏好。用户想管理 Skills、MCP +servers、official Plugins 和 Remote control channels 时,不应该打开 Settings,也不应该弹一个新的 +插件设置窗口。 + +本目标是把插件型能力移动到主窗口的一级页面和子路由中,形态参考 Codex 的主窗口 Plugins 页面: +左侧仍是主窗口 sidebar,右侧主内容区显示 `Plugins` 页面、顶部 tabs、搜索、已添加项和推荐项。 + +## Product Position + +`Plugins` 是主窗口里的扩展能力页面,不是独立 BrowserWindow,也不是 Settings 的子页面。 + +| Capability | 在主窗口 Plugins 页面中的定位 | 数据事实源 | +| --- | --- | --- | +| Official Plugins | 可启停的 DeepChat first-party plugin package,例如 CUA、Feishu/Lark Integration | `PluginPresenter` + `plugins.*` routes | +| MCP | 工具 server 管理、market、global MCP enablement | `McpPresenter` / `useMcpStore` | +| Skills | agent skill 管理、导入导出、sync、draft suggestion | `SkillPresenter` / `SkillSyncPresenter` / `useSkillsStore` | +| Remote | Telegram、Feishu/Lark、QQBot、Discord、WeChat iLink 作为 virtual plugin card | `RemoteControlPresenter` + `remoteControl.*` routes | + +Remote channel 是 Plugins UI 里的 virtual plugin,不是 `.dcplugin` 安装包。这个建模只改变用户入口和 +展示方式,不改变 remote control 的配置存储、runtime 生命周期或消息协议。 + +## Goals + +- 新增主窗口 route:`/plugins`。 +- 在主窗口主内容区集中管理 Official Plugins、MCP、Skills、Remote。 +- 主窗口 sidebar 展开态右栏显示 `New Chat`、`Search`、`Plugins` command list,点击 `Plugins` 进入 `/plugins`。 +- `Plugins` command 下方留出空行,再显示 `Pinned`、`Chat`、`工作区` 和 project groups。 +- Settings 等底部 app controls 继续留在现有左侧 rail,不进入展开右栏。 +- `Plugins` 页面保留左侧 sidebar,右侧内容区切换,不创建新窗口。 +- Remote 每个 implemented channel 都作为 plugin-like card 出现,并能进入该 channel 的详情子路由。 +- Remote 设置页不再在 Settings 中展示;从列表进入详情时使用主窗口 Plugins 子路由。 +- Official plugin 的详情和设置入口不再弹出 per-plugin BrowserWindow;从列表进入详情时使用主窗口 Plugins 子路由。 +- Settings 侧边栏、Settings Overview 搜索和 quick entry 不再展示 Skills、MCP、Plugins、Remote。 +- 保留 Settings 内部旧 route 的兼容能力,避免 deeplink、onboarding 或历史入口直接 404。 +- `所有 Agents` 标题保留。 +- UI 需要在 macOS、Windows、Linux 以及窄窗口下保持可用和美观。 + +## Non-Goals + +- 不做第三方 plugin marketplace。 +- 不把 Remote channel 改造成真实 `.dcplugin` 包。 +- 不迁移 provider、model、DeepChat Agents、ACP Agents、prompt、memory、knowledge、data、shortcut、about 等系统设置。 +- 不新增 Automations 入口;参考截图中有 Automations,但本目标只做 `New Chat`、`Search`、`Plugins`。 +- 不新增独立 Plugins BrowserWindow。 +- 不新增 `src/renderer/plugins` 独立 renderer entry。 +- 不重写 MCP、Skill、Remote、Plugin presenter。 +- 不新增统一持久化表来存一个“大插件模型”。 +- 不改变 existing Remote commands、pairing protocol、channel binding behavior。 +- 不改变 existing MCP server config schema、Skill sidecar schema 或 plugin manifest schema,除非内嵌设置页确实需要最小 route 补充。 + +## Current State + +Relevant current files: + +| Area | Current files | +| --- | --- | +| Main app shell | `src/renderer/src/App.vue`, `src/renderer/src/router/index.ts` | +| Main sidebar | `src/renderer/src/components/WindowSideBar.vue`, `src/renderer/src/stores/ui/sidebar.ts` | +| Chat page internal route state | `src/renderer/src/stores/ui/pageRouter.ts`, `src/renderer/src/views/ChatTabView.vue` | +| Settings shell and navigation | `src/renderer/settings/App.vue`, `src/renderer/settings/main.ts`, `src/shared/settingsNavigation.ts` | +| Settings window lifecycle | `src/main/presenter/windowPresenter/index.ts`, `src/shared/contracts/routes/system.routes.ts` | +| Plugins settings page | `src/renderer/settings/components/PluginsSettings.vue`, `src/renderer/api/PluginClient.ts`, `src/shared/contracts/routes/plugins.routes.ts` | +| MCP settings page | `src/renderer/settings/components/McpSettings.vue`, `src/renderer/src/components/mcp-config/**`, `src/renderer/src/stores/mcp.ts` | +| Skills settings page | `src/renderer/settings/components/skills/SkillsSettings.vue`, `src/renderer/src/stores/skillsStore.ts` | +| Remote settings page | `src/renderer/settings/components/RemoteSettings.vue`, `src/renderer/api/RemoteControlClient.ts` | + +Important current constraints: + +- Main window Vue router currently exposes `/chat` and `/welcome`. +- Main shell already keeps `WindowSideBar` outside `RouterView`, so adding `/plugins` naturally preserves the sidebar. +- Settings navigation is centralized in `src/shared/settingsNavigation.ts`. +- Settings routes are generated from navigation items in `src/renderer/settings/main.ts`. +- `system.openSettings` only accepts `SettingsRouteNameSchema`. +- MCP install deeplinks currently open Settings and send `DEEPLINK_EVENTS.MCP_INSTALL`. +- Plugin settings currently call `plugins.invokeAction({ actionId: 'settings.open' })`, which opens a per-plugin BrowserWindow. +- Remote channels already expose `RemoteChannelDescriptor`, status, settings, bindings and pairing through typed routes. + +## Proposed Main Route Structure + +```text +src/renderer/src/router/index.ts +├── /chat +├── /welcome +└── /plugins + ├── tab=plugins or child /plugins + ├── /plugins/skills + ├── /plugins/mcp + └── /plugins/:pluginId +``` + +Implementation can use nested Vue routes or one `/plugins` route with internal tab state. The URL must be shareable enough for internal navigation and redirects: + +| Target | Required addressable route | +| --- | --- | +| Plugins catalog | `/plugins` | +| Skills | `/plugins/skills` | +| MCP | `/plugins/mcp` | +| Plugin detail | `/plugins/:pluginId` | + +Legacy `/plugins/official/:pluginId`, `/plugins/remote` and `/plugins/remote/:channel` paths may redirect for compatibility, but they are not product routes. + +## Proposed Information Architecture + +Top-level sections: + +| Section | User label | Contents | +| --- | --- | --- | +| Plugins | Plugins | official plugin packages, added/recommended plugin cards, Remote virtual plugin cards | +| Skills | Skills | installed skills, install, edit, sync import/export, draft suggestion toggle | +| MCP | MCP Servers | user MCP servers, plugin-owned MCP status, MCP market/add flow | + +The visual top tab row uses `Plugins`, `Skills` and `MCP`. `Remote` is not a top tab; each remote channel is a virtual plugin card in the catalog. + +Remote virtual plugin ids use `remote:`. Feishu/Lark is special: when the official Feishu/Lark Integration plugin is installed, the Feishu/Lark Remote card is merged into that official plugin detail page. + +Historical remote settings compatibility: if a channel has credentials/accounts from an older configuration and no explicit enabled flag, that virtual plugin starts enabled by default. Explicit `enabled: false` still stays disabled. + +## Main Window Plugins UX + +### Desktop Layout + +```text +┌──────────────────────────────────────────────────────────────────────────────┐ +│ AppBar │ +├───────────────┬──────────────────────────────────────────────────────────────┤ +│rail│ expanded sidebar │ [Plugins] [Skills] [MCP] + ↻ │ +│ │ 所有 Agents │ │ +│ │ New Chat │ Plugins │ +│ │ Search │ Work with DeepChat across your favorite tools │ +│ │ Plugins │ ┌────────────────────────────────────────────┐ │ +│ │ │ │ Search plugins and remote channels... │ │ +│ │ Pinned │ └────────────────────────────────────────────┘ │ +│ │ ... │ │ +│ │ Chat │ Added Manage │ +│ │ ... │ [CUA] [Feishu] [Telegram] [Skill pack] │ +│ │ 工作区 [sort]│ │ +│ │ project A │ Featured │ +│ │ project B │ Computer Use Add Chrome Add │ +│⚙︎ │ │ Spreadsheets ... Presentations ... │ +└────┴──────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +### Detail Route Layout + +```text +┌──────────────────────────────────────────────────────────────────────────────┐ +│ AppBar │ +├───────────────┬──────────────────────────────────────────────────────────────┤ +│rail│ expanded sidebar │ [Plugins] [Skills] [MCP] │ +│ │ 所有 Agents │ ← Back to Plugins │ +│ │ New Chat │ Telegram Remote on/off │ +│ │ Search │ Status: running · bindings: 2 · last error: none │ +│ │ Plugins │ │ +│ │ │ ┌ Credentials ────────────────────────────────────┐ │ +│ │ Pinned │ │ Bot token / app credentials │ │ +│ │ ... │ └─────────────────────────────────────────────────┘ │ +│ │ Chat │ ┌ Remote Control ────────────────────────────────┐ │ +│ │ 工作区 [sort]│ │ Default agent · Default workdir · Pairing │ │ +│ │ project A │ └─────────────────────────────────────────────────┘ │ +│⚙︎ │ │ ┌ Bindings ──────────────────────────────────────┐ │ +│ │ │ │ Existing chats/channels and remove actions │ │ +│ │ │ └─────────────────────────────────────────────────┘ │ +└────┴──────────────────┴──────────────────────────────────────────────────────┘ +``` + +### Narrow Main Window Layout + +At constrained widths, keep the same app shell and avoid modal navigation: + +```text +┌────────────────────────────────────┐ +│ AppBar │ +├────┬───────────────────────────────┤ +│rail│ [Plugins][Skills][MCP] │ +│⚙︎ ├───────────────────────────────┤ +│ │ Search │ +│ │ │ +│ │ Card list / Detail page │ +│ │ │ +└────┴───────────────────────────────┘ +``` + +Narrow behavior: + +- If sidebar is collapsed, it stays collapsed. +- Section navigation becomes a wrapped horizontal tab row. +- Detail pages keep a top back button. +- Long tokens, paths and errors must truncate with tooltip or wrap in a controlled block. +- Forms use one column. + +## Sidebar UX + +### Target Expanded Shape + +```text +┌────┬──────────────────────────────┐ +│rail│ 所有 Agents │ +│ │ │ +│ │ ┌──────────────────────────┐ │ +│ │ │ ✎ New Chat ⌘N │ │ +│ │ │ 🔍 Search ⌘P │ │ +│ │ │ ⌘ Plugins │ │ +│ │ └──────────────────────────┘ │ +│ │ │ +│ │ Pinned (if any) │ +│ │ ... │ +│ │ Chat │ +│ │ ... │ +│ │ 工作区 [sort] │ +│ │ project groups │ +│⚙︎ │ ... │ +└────┴──────────────────────────────┘ +``` + +Notes: + +- `New Chat` starts a new conversation through the existing session store path and navigates to `/chat` if needed. +- `Search` opens existing Spotlight/search behavior; it is not a second search implementation. +- `Plugins` routes the current main window to `/plugins`. +- There is a blank spacer after `Plugins` before the conversation sections. +- `Pinned` is rendered only when pinned sessions exist. +- `Chat` remains the unprojected/default chat group. +- `工作区` is a section title for project groups; the existing group-mode/sort toggle moves to this row. +- Shortcut badges display only for existing registered shortcuts. Do not add a new shortcut just to fill the badge. +- The existing collapsed rail stays visually and behaviorally unchanged. +- The existing Agent icon rail remains the collapsed-state affordance; no new collapsed Plugins icon is added. +- Settings, theme and other bottom controls stay in the existing left rail. They are not listed under `Plugins` in the expanded right column. +- The old header new-chat plus button is removed as a competing primary action. The existing group-mode/sort control moves to the `工作区` header. + +### Collapsed Shape + +Collapsed state remains current: + +```text +┌────┐ +│ ◎ │ all agents / agent icons +│ .. │ +│ 🔍 │ existing search affordance +│ .. │ existing status/theme/sidebar/settings affordances +└────┘ +``` + +## Settings UX + +Settings remains for system/model/account/data preferences: + +```text +Settings +├── Overview +├── Common +├── Display +├── Environments +├── Providers +├── DeepChat Agents +├── ACP +├── Notifications / Hooks +├── Scheduled Tasks +├── Prompt +├── Memory +├── Knowledge Base +├── Database +├── Shortcuts +└── About +``` + +Removed from visible Settings navigation: + +- MCP +- Remote +- Plugins +- Skills + +Compatibility behavior: + +- Existing internal settings routes can remain hidden during migration. +- Direct navigation to removed route names should focus the main window and route to the matching `/plugins...` page when possible. +- Settings Overview search should not list hidden Plugins-owned entries. +- Settings activity records can keep historical `routeName` values; opening them should redirect to Plugins when the route is now Plugins-owned. + +## Acceptance Criteria + +### Main Window Route + +- `/plugins` renders inside the existing main app shell and keeps `WindowSideBar` visible. +- Opening Plugins from the main sidebar navigates the current main window to `/plugins`. +- No new Plugins BrowserWindow is created. +- No new `src/renderer/plugins` renderer entry is added. +- `AppBar`, sidebar, theme, language direction and global overlays continue to work. +- The page has stable responsive behavior and remains usable at narrow widths. +- User-facing strings use i18n keys. + +### Official Plugins + +- Official plugin list keeps enable/disable/status behavior. +- Plugin-owned MCP errors remain visible. +- Opening a plugin settings/detail uses `/plugins/:pluginId`, not Settings and not a per-plugin BrowserWindow. +- CUA detail includes runtime/MCP status, permission checks and permission guide actions. +- Remote virtual plugin detail pages use the same top-level enable/disable button style as official plugin details. The embedded remote form must not show a second channel toggle. +- Feishu/Lark Integration detail includes Feishu/Lark Remote configuration instead of showing a separate Feishu/Lark Remote card. +- Feishu/Lark Integration has one top-level enable/disable control; it enables/disables both the official plugin and the embedded Feishu/Lark Remote configuration. The embedded remote form must not show a second channel toggle. +- Legacy `settings.open` plugin action is not used as the primary UI path after migration. + +### MCP + +- MCP global enablement, server list, add/edit, market view and NPM registry controls remain available from Plugins. +- Plugin-owned MCP servers remain read-only where their owning plugin controls lifecycle. +- MCP install deeplinks focus the main window and route to `/plugins/mcp` instead of opening Settings. + +### Skills + +- Installed Skills list, search, install, edit, delete, sync import/export and draft suggestion toggle remain available from Plugins. +- First-launch sync prompt remains available if it is still part of the current product flow. +- Skill drag/drop and URL/zip/folder install behavior remains unchanged. + +### Remote + +- Every implemented `RemoteChannelDescriptor` appears as a Remote virtual plugin card. +- Each card shows enabled state, runtime state, binding/pairing summary and last error when present. +- Each card opens `/plugins/:pluginId`; remote virtual plugin ids use `remote:`. +- Channel settings render inside the plugin detail page and preserve current behavior: + - credentials + - enable/disable + - default agent + - default workdir + - pairing + - bindings/principals + - channel-specific login/account controls for WeChat iLink +- Saving a channel setting still uses `remoteControl.saveChannelSettings`. +- Remote status indicator in the main sidebar continues to work. + +### Main Sidebar + +- Expanded sidebar shows `New Chat`, `Search`, `Plugins` before pinned sessions. +- Expanded sidebar inserts a blank spacer after `Plugins`. +- Expanded sidebar shows `Pinned` only when pinned sessions exist, then `Chat`, then `工作区`. +- The existing group-mode/sort toggle is placed on the `工作区` header row. +- `所有 Agents` remains visible. +- Settings and other bottom controls remain in the existing left rail, not in the expanded right column. +- Collapsed sidebar keeps current visual shape and behavior. +- No new collapsed icon button is added. +- Session list pagination, pinned section, project grouping, drag reorder and keyboard shortcut badges continue working. + +### Settings Removal + +- Settings sidebar no longer shows MCP, Remote, Plugins or Skills. +- Settings Overview search no longer returns MCP, Remote, Plugins or Skills as settings pages. +- Settings Overview no longer uses MCP as one of the primary system metrics or quick-start tasks. +- Existing app code that opens `settings-mcp`, `settings-remote`, `settings-plugins` or `settings-skills` is migrated or redirected to `/plugins...`. + +## Platform and Accessibility Requirements + +- Keyboard navigation works across top tabs, search, card list, detail forms and back navigation. +- `Esc` closes transient dialogs only; it does not leave `/plugins`. +- `Tab` order follows visual order. +- Buttons and icon-only controls have accessible labels. +- Status colors are not the only status signal; labels must remain visible. +- Long file paths, tokens, error strings and command lines do not overflow their container. +- Remote credentials remain password inputs by default, preserving current reveal behavior. +- Linux and Windows backgrounds must not rely on macOS-only materials. +- RTL languages should inherit existing app i18n direction handling. + +## Open Questions + +None. diff --git a/docs/features/plugins-hub/tasks.md b/docs/features/plugins-hub/tasks.md new file mode 100644 index 000000000..a2133d0fd --- /dev/null +++ b/docs/features/plugins-hub/tasks.md @@ -0,0 +1,130 @@ +# Plugins Hub Tasks + +## 0. Review Gate + +- [x] Review `spec.md` with product/maintainers. +- [x] Review `plan.md` main-route architecture, route compatibility, and sidebar layout. +- [x] Confirm no unresolved clarification markers exist before implementation. +- [ ] Keep this SDD folder active until the feature lands or is deliberately abandoned. + +## 1. Main Route Skeleton + +- [x] Add `/plugins` route family to the existing main renderer router. +- [x] Add `PluginsHubPage.vue` inside `src/renderer/src/pages/plugins/`. +- [x] Add top tab navigation for Plugins, Skills and MCP. +- [x] Add Codex-like catalog placeholder with title, subtitle, search, added strip and featured sections. +- [x] Keep MCP and Skills as top tabs only, not plugin catalog cards. +- [x] Keep `WindowSideBar`, `AppBar`, global overlays, theme and i18n behavior intact. +- [x] Add i18n keys for route, page title, subtitle, tabs and search placeholder. +- [ ] Add renderer tests proving `/plugins` renders inside the existing app shell. + +## 2. Main-Process Navigation Compatibility + +- [x] Reuse existing deeplink event handling for main-process initiated MCP navigation. +- [x] Ensure MCP install deeplink can focus/create the normal main window and navigate to `/plugins/mcp`. +- [x] Do not add a Plugins BrowserWindow. +- [x] Do not add `src/renderer/plugins` or a separate renderer entry. +- [ ] Add tests for focusing main and navigating to `/plugins/mcp`. + +## 3. Settings Navigation Cleanup + +- [x] Hide or remove visible Settings navigation items for MCP, Remote, Plugins, and Skills. +- [x] Keep compatibility routes or redirect handlers for old route names. +- [ ] Map every old route name to main `/plugins...` routes. +- [x] Remove MCP from Settings Overview primary metric. +- [x] Remove or replace Settings Overview `start-mcp` quick task. +- [x] Ensure Settings Overview search does not return hidden Plugins-owned pages. +- [ ] Update Settings activity click behavior for historical routes. +- [ ] Add tests for Settings navigation groups and hidden route handling. + +## 4. MCP Section + +- [x] Create `/plugins/mcp` page using current MCP store/client behavior. +- [x] Reuse `McpSettings`/current MCP components for list/add/edit/toggle. +- [x] Reuse MCP market view inside `/plugins/mcp?view=market`. +- [x] Reuse NPM registry controls. +- [x] Move MCP install deeplink target from Settings to main `/plugins/mcp`. +- [x] Move MCP install event handling into the main app or Plugins route bootstrap. +- [x] Keep plugin-owned MCP server read-only behavior. +- [ ] Add tests for deeplink route target and MCP page render. + +## 5. Skills Section + +- [x] Create `/plugins/skills` page from current Skills settings behavior. +- [x] Reuse skill list, search, install, edit, delete, sync import/export. +- [x] Preserve draft suggestion toggle. +- [x] Preserve first-launch sync prompt if still required. +- [x] Ensure skill dialogs/sheets fit the main Plugins page shell. +- [ ] Add renderer tests for empty/list/search/install entry behavior. + +## 6. Official Plugins Section + +- [x] Create official plugin list route from `PluginClient.listPlugins`. +- [x] Add unified detail route `/plugins/:pluginId`. +- [x] Keep enable/disable actions. +- [x] Show runtime status, plugin-owned MCP status and last errors. +- [ ] Add native CUA detail sections for runtime status, permissions and permission guide actions. +- [x] Merge Feishu/Lark Remote configuration into the Feishu/Lark Integration detail page. +- [x] Use the Feishu/Lark Integration top-level enable/disable button to control both the official plugin and Feishu/Lark Remote. +- [x] Stop first-party Plugins UI from calling `settings.open`. +- [x] Keep `settings.open` only as temporary compatibility fallback. +- [ ] Add tests for list/detail action behavior. + +## 7. Remote Virtual Plugins + +- [x] Build remote virtual cards from `remoteControl.listChannels`. +- [x] Fetch and display per-channel status. +- [x] Route remote virtual plugin cards through `/plugins/:pluginId` using `remote:` ids. +- [x] Remove the Remote top tab/product list route. +- [x] Reuse `RemoteSettings` in single-channel mode inside plugin detail pages. +- [x] Use the plugin detail top-level enable/disable button for remote virtual plugin state. +- [x] Auto-enable configured legacy channels when the explicit enabled flag is missing. +- [x] Preserve credentials fields and password reveal behavior. +- [x] Preserve enable/disable save behavior. +- [x] Preserve default agent and default workdir behavior. +- [x] Preserve pairing flow for Telegram, Feishu/Lark, QQBot and Discord. +- [x] Preserve binding/principal removal behavior. +- [x] Preserve WeChat iLink login/account controls. +- [x] Route sidebar remote status button to the first enabled remote plugin detail. +- [ ] Add tests for card mapping, save, pairing and bindings. + +## 8. Main Sidebar Layout + +- [x] Replace expanded sidebar header/search area with command list. +- [x] Keep `所有 Agents` title. +- [x] Wire `New Chat` row to navigate to `/chat` and start a new conversation. +- [x] Wire `Search` row to existing Spotlight behavior. +- [x] Localize the `Search` command label for Chinese locales. +- [x] Wire `Plugins` row to `router.push({ name: 'plugins' })`. +- [x] Add a blank spacer after the `Plugins` command row. +- [x] Render `Pinned` only when pinned sessions exist. +- [x] Keep the `Chat` group after `Pinned`. +- [x] Add `工作区` header before project groups. +- [x] Move the existing group-mode/sort toggle to the `工作区` header. +- [x] Keep Settings/theme/sidebar controls in the existing left rail, not in the expanded right column. +- [x] Display shortcut badges only for existing shortcuts. +- [x] Keep collapsed sidebar visual behavior unchanged. +- [x] Preserve session list pagination, pinned section, project grouping and reorder. +- [ ] Add renderer tests for expanded rows and collapsed state. +- [ ] Capture before/after ASCII blocks in PR description. + +## 9. Cross-Platform UI QA + +- [ ] Verify macOS light/dark with app shell and sidebar. +- [ ] Verify Windows light/dark with app shell and sidebar. +- [ ] Verify Linux opaque background. +- [ ] Verify narrow main window layout with expanded sidebar. +- [ ] Verify narrow main window layout with collapsed sidebar. +- [ ] Verify long paths/tokens/errors do not overflow. +- [ ] Verify keyboard navigation and focus order. +- [ ] Verify Chinese and English labels. + +## 10. Final Quality Gates + +- [x] Run `pnpm run format`. +- [x] Run `pnpm run i18n`. +- [x] Run `pnpm run lint`. +- [x] Run `pnpm run typecheck`. +- [ ] Run targeted renderer tests for Plugins route and sidebar. +- [ ] Run targeted main tests for navigation/deeplink behavior. +- [ ] Update durable docs or remove/archive active plan/tasks after implementation lands. diff --git a/docs/features/remote-feishu-lark-scan-auth/spec.md b/docs/features/remote-feishu-lark-scan-auth/spec.md new file mode 100644 index 000000000..ae4317cff --- /dev/null +++ b/docs/features/remote-feishu-lark-scan-auth/spec.md @@ -0,0 +1,57 @@ +# Remote Feishu/Lark Scan Authorization + +## User Need + +DeepChat already supports Feishu/Lark remote control through a manually configured self-built bot app. Users want a guided Kun-style setup that avoids local OAuth callback configuration and minimizes developer-console work. They also need the official PersonalAgent install link to be available in two explicit modes: open the official web page, or show an in-app QR code generated from the same install link. Manual `/pair` pairing and OAuth scan authorization should be presented as complementary ways to authorize remote-control users instead of disconnected flows. + +## Goals + +- Add an official Feishu/Lark PersonalAgent install flow that can obtain bot App ID/App Secret through Feishu/Lark authorization, without requiring users to configure a local OAuth redirect URI. +- Expose two install actions in Remote settings > Feishu/Lark: + - open the official web install page externally; + - show an in-app QR dialog whose QR payload is exactly the official `installUrl` returned by the install session. +- Keep the existing manual configuration fields: brand, App ID, App Secret, verification token, encrypt key, enable switch, default agent, default workdir, pair-code dialog, and bindings management. +- Present manual `/pair ` and OAuth scan authorization in the same Feishu/Lark user-authorization section: + - `/pair ` remains the universal bot-command pairing path; + - OAuth scan authorization remains an optional fallback for users who already configured App ID/App Secret. +- Support both Feishu and Lark tenants: + - begin registration on `accounts.feishu.cn` for both brands so the QR launcher is accepted by Feishu/Lark clients; + - switch polling to `accounts.larksuite.com` only after `tenant_brand=lark` is detected without a secret. +- Auto-save returned PersonalAgent credentials into the existing Feishu settings shape and use the returned user `open_id` for pairing if the registration response provides one. + +## Acceptance Criteria + +1. In Remote settings > Feishu/Lark, users can still manually edit and save all existing credentials and remote-control settings. +2. Users can start an official PersonalAgent install session without entering App ID/App Secret first. +3. The install section has separate buttons for opening the official web page and for showing an in-app QR code. +4. The QR install button shows a dialog containing a QR code generated locally from the returned `installUrl`; it does not open the external browser automatically. +5. The web install button opens the returned `installUrl` externally and continues waiting for the same install result. +6. The install flow calls the official app registration endpoint with `action=begin`, `archetype=PersonalAgent`, `auth_method=client_secret`, and `request_user_info=open_id tenant_brand`, then polls with `action=poll` and `device_code`. +7. On successful install, DeepChat stores the returned `client_id` as App ID, `client_secret` as App Secret, and tenant brand as Feishu/Lark brand; secrets are never logged. +8. If the poll response includes an authorized user `open_id`, DeepChat adds it to `pairedUserOpenIds`; no user OAuth access token or refresh token is persisted. +9. The existing local-callback OAuth pairing flow must not be the primary setup path and must not require users to configure `http://127.0.0.1:32178/remote/feishu/auth/callback`. +10. Users can still use `/pair ` exactly as before, and the Feishu/Lark settings UI explains how `/pair` and OAuth scan authorization feed the same authorized-user list. +11. The UI presents the no-manual-callback install path first, with manual developer-console setup as an advanced/fallback path. +12. Cancelling or timing out a Feishu/Lark install or scan-authorization session prevents any later in-flight async response from writing credentials, paired users, or rebuilding runtime. + +## Constraints + +- Do not weaken existing Feishu message authorization: group/topic messages still require a paired user and bot mention. +- Do not store Feishu user access tokens or refresh tokens in the remote-control config. +- Do not remove manual configuration paths. +- Do not log secrets, tokens, authorization codes, full QR URLs, or provider raw error bodies. +- Generate the install QR locally; do not send the install URL to a third-party QR service. +- Use typed routes and renderer API client methods rather than legacy presenter calls. +- Preserve unrelated local changes already present in the working tree. + +## Non-goals + +- Guaranteeing Feishu/Lark's undocumented PersonalAgent registration endpoint is stable or officially supported beyond observed Kun/SDK behavior. +- Automating tenant administrator approval when a tenant policy blocks PersonalAgent authorization. +- Replacing the existing Feishu WebSocket event stream runtime. +- Changing the Feishu MCP plugin settings flow. +- Replacing `/pair` with OAuth scan authorization. + +## Open Questions + +- None for implementation. Risk note: the PersonalAgent registration endpoint is inferred from Kun and official CLI/SDK behavior rather than a separately verified public Feishu documentation page. diff --git a/docs/issues/context-overflow-auto-handoff/plan.md b/docs/issues/context-overflow-auto-handoff/plan.md new file mode 100644 index 000000000..cfa66cf7d --- /dev/null +++ b/docs/issues/context-overflow-auto-handoff/plan.md @@ -0,0 +1,82 @@ +# Context Overflow Auto-Handoff Plan + +## Approach + +Add a provider-call wrapper inside `runStreamForMessage` around the provider `coreStream`. The +wrapper will keep the existing local preflight, detect context-window failures before any provider +output is yielded to `processStream`, recover the request once, and retry with a rebuilt request view. + +## Implementation + +- Add `contextWindowError.ts` with `isContextWindowErrorLike(value: unknown): boolean` and use it + from both `process.ts` and the provider wrapper. +- Preserve local request preflight before provider calls. +- Track whether any provider event has been yielded. If the provider throws a matching context + overflow before that point, recover and retry. If the provider's first event is a matching error + event, recover and retry without yielding it. +- Keep every event after the first yielded event non-recoverable, including content, reasoning, tool + call, permission, usage, image, rate limit, stop, and later errors. +- Reuse `recoverRequestContextPressure` for auto-compaction recovery. When it returns no compaction + intent because auto compaction is disabled, keep the existing deterministic fit/max-token shrink + path and retry without summary generation. +- Re-run preflight after recovery. If the request still does not fit, throw + `buildRequestContextOverflowErrorMessage`. +- If the post-recovery retry still returns or throws a context-window error before any output, throw + a local DeepChat diagnostic instead of yielding the provider error. Use + `buildRequestContextOverflowErrorMessage` only when the fresh retry preflight still does not fit; + otherwise explain that the provider still reported context overflow after recovery despite + DeepChat's local estimate fitting. +- Treat local preflight recovery and provider overflow recovery as one assistant-run recovery + budget. If preflight recovery has already run, a provider overflow can only schedule one + summary-free strict trim retry; it must not run summary handoff again. +- Keep context-window matching strict enough to avoid quota, billing, and rate-limit false positives; + only generic token-limit wording may match when accompanied by request/context/input/prompt + wording. +- Keep `input exceeds` behind stronger token/context-pressure hints so unrelated input-size or upload + errors do not trigger context overflow recovery. +- Scan wrapped provider error fields by priority and stop on the first match so a long unrelated + field cannot hide a later context-window field. +- Continue scanning SDK `Error` instances after `message`, `name`, and `cause` so custom fields such + as `body` and `response.data.error.message` can trigger recovery. +- Scan array-shaped provider error payloads with a small fixed element cap. Include common + `errors` and `issues` fields while keeping existing quota, billing, rate-limit, and `429` + exclusion behavior unchanged. +- Cap strict retry's extra reserve at 8,192 tokens while preserving the existing max-token shrink. +- Persist view manifests with the actual per-attempt budget. Strict retry manifests record the + halved/capped requested max tokens and include the strict extra reserve in `reserveTokens`. +- Pass the request model id into DeepChat budget bypass detection so video-generation model-id + heuristics remain effective. +- Continue calling Memory injection after successful compaction recovery through the existing system + prompt rebuild path. Continue calling Memory extraction only when a compaction intent was actually + applied. + +## Compatibility + +- No schema, IPC, route, or public setting changes. +- Existing tape anchor name `auto_handoff/context_overflow` is reused. +- Existing stored messages are not deleted; recovery only changes the request view and summary cursor. +- Auto compaction disabled remains respected for summary generation, while still allowing deterministic + trim retry as a hard fallback. + +## Tests + +- Add classifier coverage for common provider messages. +- Add classifier negative coverage for quota, billing, TPM/RPM, rate-limit, and generic token-quota + failures. +- Add agent runtime coverage for first-event context overflow recovery, thrown context overflow + recovery, post-output overflow non-retry, auto-compaction-disabled trim retry, oversized local + request blocking, and ACP/image bypass. +- Add retry-failure coverage proving a second pre-output context overflow returns DeepChat local + budget guidance and does not perform a third provider call. +- Add video model-id bypass coverage for models such as `sora-*`. +- Add an agent-runtime test path that uses the real `processStream` to verify persisted assistant + errors do not contain provider raw context-window text. +- Add regression coverage for preflight recovery followed by provider overflow, proving no second + summary handoff occurs and only one strict trim retry is allowed. +- Add classifier coverage for SDK `Error` objects with nested `response`/`body` fields. +- Add classifier coverage for bounded `errors[]` / `issues[]` provider payloads. +- Add classifier coverage proving generic `input exceeds` file-size or upload-limit failures do not + match. +- Add manifest coverage for strict retry token budget fields. +- Preserve process-stream behavior for context overflow errors that are not intercepted by the + provider wrapper. diff --git a/docs/issues/context-overflow-auto-handoff/spec.md b/docs/issues/context-overflow-auto-handoff/spec.md new file mode 100644 index 000000000..1ba866150 --- /dev/null +++ b/docs/issues/context-overflow-auto-handoff/spec.md @@ -0,0 +1,69 @@ +# Context Overflow Auto-Handoff Spec + +## Problem + +When a provider rejects a request after DeepChat's local budget preflight, the first streamed event +can be a context-window error such as "input exceeds the context window". Today that event reaches +`processStream`, is persisted as an assistant error, and users see the provider's raw red error. + +This is especially visible when local estimates differ from a provider tokenizer or when provider +schemas/system prompts are counted differently by the upstream API. + +## Goal + +Recover from provider-side context overflow before any output is shown, using DeepChat's existing +tape, rolling summary, summary cursor, and view manifest flow. The behavior should mirror the Bub +`auto_handoff/context_overflow` pattern, where an overflow is treated as an automatic handoff point, +without adding a Bub or tape.systems dependency. + +## Acceptance Criteria + +- Provider context overflow thrown before the first stream event triggers one automatic recovery and + retry. +- Provider context overflow delivered as the first stream event triggers one automatic recovery and + retry without persisting that error event. +- Provider context overflow after any streamed content, tool call, permission request, image, usage, + or stop event does not retry. +- With auto compaction enabled, recovery creates an `auto_handoff/context_overflow` compaction anchor, + updates rolling summary state, rebuilds the system prompt, and retries. +- With auto compaction disabled, recovery does not call the summary LLM or write compaction anchors; + it only uses deterministic request trimming and max-token shrink before retrying. +- Retry performs a fresh preflight and never sends a request that DeepChat already knows cannot fit. +- If the retry still fails with a context-window error before any provider output, DeepChat returns + local budget guidance instead of showing the provider's raw context-window error. +- A local preflight recovery and a provider overflow recovery share the same assistant-run recovery + budget; once preflight recovery has compacted or trimmed the request, provider overflow may only + trigger a summary-free strict trim retry. +- The `auto_handoff/context_overflow` anchor is written at most once per assistant run. +- Context-window detection must not classify quota, billing, or rate-limit failures as context + overflow. +- Context-window detection scans SDK `Error` custom fields such as `body` and `response` without + losing the recursion and text-size guards. +- Context-window detection scans bounded array-shaped provider error fields such as `errors[]` and + `issues[]` without changing quota, billing, rate-limit, or `429` exclusion semantics. +- Context-window detection does not treat generic `input exceeds` failures such as file-size or + upload-limit errors as context overflow unless token/context-pressure wording is also present. +- Provider retry failure diagnostics distinguish local over-budget requests from provider tokenizer + disagreement after DeepChat already compacted or trimmed the request. +- View manifests record the actual per-attempt token budget, including strict retry max-token shrink + and strict retry extra reserve. +- Video generation models detected by model id keep bypassing DeepChat chat context budgeting. +- Large wrapped provider errors cannot hide a later context-window field behind a long unrelated + message field. +- Memory is not a trigger. Memory only affects optional system prompt injection and optional + extraction after a successful compaction. +- ACP, image, video, and TTS paths that bypass DeepChat's chat context budget keep their current + behavior. + +## Constraints + +- No database schema, IPC route, or public configuration field changes. +- No deletion of stored messages as part of recovery. +- Automatic recovery is limited to once per assistant run. +- Provider error matching is implemented with a shared main-process classifier. + +## Non-Goals + +- Replacing DeepChat's compaction implementation with Bub or tape.systems. +- Changing Memory storage, recall, injection, or extraction semantics. +- Exact provider tokenizer parity. diff --git a/docs/issues/context-overflow-auto-handoff/tasks.md b/docs/issues/context-overflow-auto-handoff/tasks.md new file mode 100644 index 000000000..ec30c2b89 --- /dev/null +++ b/docs/issues/context-overflow-auto-handoff/tasks.md @@ -0,0 +1,32 @@ +# Context Overflow Auto-Handoff Tasks + +- [x] Create SDD issue artifacts. +- [x] Add shared context-window error classifier. +- [x] Replace `process.ts` private classifier with the shared classifier. +- [x] Add provider first-event/first-throw recovery wrapper in `runStreamForMessage`. +- [x] Keep auto-compaction-disabled recovery summary-free and trim-only. +- [x] Add focused compaction and agent runtime tests. +- [x] Update Auto Compaction copy in all locales. +- [x] Run `pnpm run format`, `pnpm run i18n`, `pnpm run lint`, and targeted tests. +- [x] Narrow context-window classifier and add false-positive tests. +- [x] Convert post-recovery provider overflow into local budget diagnostics. +- [x] Cap strict trim retry extra reserve. +- [x] Add second-overflow retry failure tests. +- [x] Preserve video model-id context-budget bypass in provider retry wrapper. +- [x] Split retry-failure diagnostics for local over-budget vs provider tokenizer disagreement. +- [x] Scan classifier fields by priority without long-field starvation. +- [x] Add real processStream persistence coverage for provider-overflow retry failure. +- [x] Document run-level recovery guard repair follow-up. +- [x] Share one assistant-run recovery budget between preflight recovery and provider overflow. +- [x] Scan SDK `Error` custom fields in the context-window classifier. +- [x] Persist strict retry view manifests with the actual attempt token budget. +- [x] Add regression tests for preflight-recovery overflow, second-attempt throw, and manifest budget. +- [x] Run repair validation commands. +- [x] Document bounded array-shaped provider error polish. +- [x] Add bounded array scanning to `isContextWindowErrorLike()`. +- [x] Add array-shaped context overflow classifier tests. +- [x] Replace strict retry manifest magic-number assertions with formula-based expectations. +- [x] Run P3 polish validation commands. +- [x] Guard `input exceeds` behind token/context-pressure hints. +- [x] Clarify Bub `auto_handoff/context_overflow` reference in the spec. +- [x] Polish es-ES Auto Compaction description wording. diff --git a/docs/issues/cua-plugin-description-copy/spec.md b/docs/issues/cua-plugin-description-copy/spec.md new file mode 100644 index 000000000..4acca6948 --- /dev/null +++ b/docs/issues/cua-plugin-description-copy/spec.md @@ -0,0 +1,46 @@ +# CUA Plugin Description Copy + +## User Need + +The CUA plugin should describe what it is instead of showing `DeepChat · com.deepchat.plugins.cua`. + +## Goal + +Show localized copy meaning: DeepChat's ComputerUse plugin implemented based on `trycua/cua`. + +## Acceptance Criteria + +- CUA plugin detail subtitle uses the localized description. +- CUA plugin catalog card uses the same localized description. +- Non-CUA plugins keep their existing description behavior. +- Completed SDD folders keep only `spec.md`. + +## UI Sketch + +Before: + +```text +CUA Computer Use Runtime +DeepChat · com.deepchat.plugins.cua +``` + +After: + +```text +CUA Computer Use Runtime +DeepChat 基于 trycua/cua 项目实现的 ComputerUse 插件 +``` + +## Constraints + +- Use i18n for user-facing copy. +- Do not add a manifest field for one plugin. + +## Non-Goals + +- Redesigning the plugin detail header. +- Changing plugin runtime metadata. + +## Open Questions + +- None. diff --git a/docs/issues/feishu-plugin-description-copy/spec.md b/docs/issues/feishu-plugin-description-copy/spec.md new file mode 100644 index 000000000..cf3601ae4 --- /dev/null +++ b/docs/issues/feishu-plugin-description-copy/spec.md @@ -0,0 +1,47 @@ +# Feishu Plugin Description Copy + +## User Need + +The Feishu/Lark plugin should describe its actual remote-control purpose instead of showing `DeepChat · com.deepchat.plugins.feishu`. + +## Goal + +Use the existing Feishu remote-control description for the official Feishu/Lark plugin in catalog and detail views. + +## Acceptance Criteria + +- Feishu/Lark plugin catalog description uses `settings.remote.feishu.description`. +- Feishu/Lark plugin detail subtitle uses `settings.remote.feishu.description`. +- zh-CN Feishu remote description names `飞书 / Lark Bot`. +- Other non-special official plugins keep the publisher/id fallback. +- Completed SDD folders keep only `spec.md`. + +## UI Sketch + +Before: + +```text +飞书 / Lark +DeepChat · com.deepchat.plugins.feishu +``` + +After: + +```text +飞书 / Lark +接入飞书 / Lark Bot,支持私聊、群聊和会话远程控制。 +``` + +## Constraints + +- Reuse existing i18n copy instead of adding duplicate plugin-specific Feishu strings. +- Do not change plugin manifest metadata. + +## Non-Goals + +- Redesigning plugin cards. +- Changing Feishu runtime behavior. + +## Open Questions + +- None. diff --git a/docs/issues/memory-first-turn-cold-start/plan.md b/docs/issues/memory-first-turn-cold-start/plan.md new file mode 100644 index 000000000..729b009b6 --- /dev/null +++ b/docs/issues/memory-first-turn-cold-start/plan.md @@ -0,0 +1,191 @@ +# Memory First-Turn Cold Start Latency Plan + +## Strategy + +Three independent workstreams. P0-A guarantees first-token latency on its own; P0-B removes the +worst amplifier; P1 raises the chance the first turn also gets semantic recall. Ship P0-A and P0-B +together; P1 can follow. + +## P0-A — Hot-path FTS zero-wait (primary, `memoryPresenter`) + +Move the DuckDB cold open and the query embedding off the pre-first-token path. Only take the +awaited vector path when the agent's store is already warm; otherwise answer this turn from FTS +and warm in the background. + +- **Add a `getDimensions` dep** to `MemoryPresenterDeps` (`types.ts:251-274`, today only + `getEmbeddings`) and wire it to `llmproviderPresenter.getDimensions` (`presenter/index.ts:575`, + alongside the existing `getEmbeddings` wiring; `llmProviderPresenter` already exposes it at + `index.ts:823-827`). This is what lets warm resolve the embedding dimension **without** a query + embedding — otherwise an implementer will reach for `getEmbeddings([query])` to learn the dim and + drag the cold-start cost back onto the hot path. +- Track readiness explicitly with the **full cache-key identity**. Add + `vectorStoreReady: Map` whose value is exactly + `vectorStoreCacheKey(agentId, embedding, dim)` = `agentId::providerId::modelId::dim` + (`index.ts:2377-2383`) — never key on `agentId` alone, or a model/dim switch would read as warm. + Set it **only** after `openVectorStoreLocked` resolves to a store whose `isUsable()` is true + (`index.ts:2424-2444`); the existing `vectorStoreIdentities` is set when the open *starts*, so it + cannot gate readiness, and the unusable-store branch (`index.ts:1703-1711`) must leave ready unset. +- Clear `vectorStoreReady` everywhere the store cache is evicted, so a stale/closed store is never + treated as warm: in `closeVectorStore` (`index.ts:2408-2409`, the single eviction chokepoint for + reset paths 611/2222/2244 and the identity-change reopen at 2435), in `dispose`'s + `.clear()` block (`index.ts:2343-2344`), and in the `openVectorStoreLocked` open-failure catch + (`index.ts:2437-2438`). +- In `retrieve` (`index.ts:1640-1717`), before the vector block, do a **synchronous** warm check + (`isVectorStoreWarm(agentId, embedding)` comparing against the full identity): + - **Warm:** keep today's path — `getEmbeddings([query])` → `getVectorStore` (now a cache hit) → + `store.query` → `fuse`. + - **Cold:** skip the query embedding **and** the store open this turn; `void warmVectorStore(...)` + in the background; fall through with empty `vecMatches` so `fuse` returns FTS-only. +- `warmVectorStore(agentId, embedding)`: resolve `dim` only from a stored `embedding_dim` on a row + whose `embedding_model === embeddingFingerprint(providerId, modelId)` (`index.ts:114`, the same + current-identity match `hasStaleEmbeddings` uses, `index.ts:2367-2375`); otherwise fall back to + the new `getDimensions` dep — **never** a query embedding, and **never** a stale-fingerprint row + (that would open the old store under the old dim and mark stale vectors warm). Then + `void getVectorStore(agentId, embedding, dim)`. Coalesce concurrent callers (reuse the in-flight + open promise; do not stack opens), but keep it **re-runnable across turns** (not one-shot) so it + self-heals after a reindex. +- **Preserve the reindex triggers the cold path would otherwise skip.** Because cold turns skip the + query embedding (and thus `hasStaleEmbeddings` at `index.ts:1653`) and the in-line unusable-store + branch (`index.ts:1703-1711`), `warmVectorStore` must reproduce them after the open resolves: + - store **not** `isUsable()` → `void reindexEmbeddings(agentId, true)` (mirrors `index.ts:1708`); + do **not** set `vectorStoreReady`. + - store usable but `hasStaleEmbeddings(agentId, dim, currentFingerprint)` → `void + reindexEmbeddings(agentId)` (mirrors `index.ts:1659`); do **not** set `vectorStoreReady`. + - store usable and not stale → set `vectorStoreReady`. + Since `isVectorStoreWarm` stays false until ready is set, each cold turn re-schedules warm, so once + the background reindex finishes the next warm finds a usable, non-stale store and marks it warm — + the agent self-heals instead of being stuck FTS-only. +- Keep the existing background `backfillEmbeddings`/`reindexEmbeddings` triggers on the warm path + intact — they already run via `void` behind the per-agent lock; warm simply front-runs the + `getVectorStore` that `retrieve` used to await. +- Preserve all `canReadAgentMemory`/teardown guards already in `retrieve`. + +Net effect: a cold first turn does FTS-only memory injection (fast, no network embed, no DuckDB +open); turn 2+ (or any turn after warm resolves) restores full hybrid recall. + +## P0-B — Ship VSS on all platforms + smoke (build/runtime) + +- Make `scripts/installVss.js` **platform/arch-aware**: remove the macOS early `return` and accept + explicit `--platform`/`--arch` flags that select the **target** triple (e.g. `osx_arm64` vs + `osx_amd64`), version-locked to the current `@duckdb/node-api` package version. Do **not** rely on the host + machine's architecture — a CI x64 box cross-building `build:mac:arm64` would otherwise bundle the + wrong extension. (DuckDB `INSTALL` resolves the host platform by default; the script must fetch/ + copy the requested target's binary, mirroring how `installRuntime:*:` already takes `-a`.) +- Wire the arch-specific install into each build target so `runtime/duckdb/extensions/` is populated + with the **matching** binary before `electron-builder` packages `./runtime/` + (`electron-builder.yml:41-43`): `build:mac:arm64` → `--platform darwin --arch arm64`, + `build:mac:x64` → `--arch x64`, and the corresponding win/linux targets. Do not blanket-insert the + default `installRuntime:duckdb:vss` (host-arch) into all builds. +- Load bundled VSS by explicit path in `memoryVectorStore.loadVss()`. In packaged builds, a missing + or invalid bundled extension fails closed so the caller falls back to FTS; network `INSTALL vss` + remains a dev/test-only fallback with explicit logging. +- On macOS, write the packaged `vss.duckdb_extension` as a base64(gzip) data asset during + `afterPack` and delete the raw Mach-O before codesign/notarization. Runtime decodes and + materializes the asset into `app.getPath('userData')` and loads that copy, preserving DuckDB's + required footer without putting a recognizable executable or gzip archive inside the notarized + `.app`. Materialization is async and process-coalesced by packaged asset path plus userData root so + multiple agents share one read/hash/inflate pass per process. +- Extend `scripts/smoke-duckdb-vss.js` to assert the extension loads from either the bundled raw + path (`LOAD ''`) or a packaged base64 asset (`--extension-base64-path`, materialized to a temp file) + without a network `INSTALL`, and run it in CI / build preflight. + +## P1 — Background prewarm (best-effort, `memoryPresenter` + lifecycle hook) + +- Add `warmActiveAgents()` that, for each enabled managed agent, `void warmVectorStore(...)` and + optionally issues one tiny `getEmbeddings` to warm the provider connection / load a local + embedding model. +- Trigger earlier than maintenance: a short dedicated startup pass (a few seconds after start) and/or + on chat session/window open — decoupled from `MAINTENANCE_START_DELAY_MS = 60s` (`index.ts:109`). + Reuse the staggering shape of `armActiveAgentsStaggered` (`index.ts:296-307`). +- Strictly best-effort: P0-A still guarantees the hot path when the user out-races the warm. + +## Follow-up — VSS reliability and cold-store coverage + +- Harden `scripts/installVss.js` without changing its public CLI: export import-safe helpers for + parsing, target resolution, retrying downloads, and validating extension metadata; run the CLI only + behind an `import.meta.url` main guard. +- Retry VSS downloads up to three times for network failures, HTTP 408/429, and 5xx. Do not retry + permanent 4xx responses; include DuckDB extension version, target triple, and URL in the failure so + a version/triple mismatch is diagnosable from CI logs. +- Validate the gunzipped extension footer before writing it into `runtime/duckdb/extensions/`: the + last 64 KiB must contain `duckdb_signature`, the expected DuckDB extension version, and the expected + target triple. +- Change the macOS build matrix so x64 uses an Intel runner (`macos-15-intel`) and arm64 uses + `macos-15`. Keep the existing install + smoke step before `electron-builder`; do not remove the + package-level build-script install hooks. +- Document the deliberate cold-store FTS-only behavior for secondary callers (`coordinateWrite`, + `searchMemories`) and add focused tests proving they do not reintroduce cold DuckDB/embedding + awaits. Keep `mergeNearDuplicates` behavior unchanged in this follow-up. +- Drain `embeddingWarmups` in both global dispose and deleted-agent cleanup, matching + `vectorStoreWarmups`. + +## Remaining low-risk follow-up — Release parity and runtime polish + +- Bring `.github/workflows/release.yml` to parity with `build.yml`: run target-arch VSS install and + `smoke:duckdb:vss` before each `electron-builder` call, and use an Intel runner for macOS x64. +- Fail packaged VSS load closed: a packaged app never performs network `INSTALL vss`; dev/test keeps + the fallback. Build and release jobs also smoke the packaged `app.asar.unpacked/runtime` copy after + `electron-builder` so CI proves the shipped asset exists and loads. +- In cold `retrieve`, keep returning FTS-only immediately but also start `warmEmbeddingConnection` + alongside `warmVectorStore`; both remain fire-and-forget and coalesced. +- Add a 30s cooldown for failed warm dimension resolution when no current embedded row provides a + dimension. The cooldown is keyed by `agentId::providerId::modelId`; it is cleared after a successful + dimension resolution and after agent cleanup/dispose. +- Extend `MemoryRepositoryPort` with targeted current-dimension and stale-existence queries, backed + by SQL `LIMIT 1` / `EXISTS` in `AgentMemoryTable`, and use them instead of scanning every embedded + row during warm and drain ready checks. +- Let offline duplicate consolidation best-effort await `warmVectorStore` before scanning neighbors; + failures still fall through to the existing FTS/no-op behavior. +- Rename the current-embedding guard to reflect that it also checks readability/teardown state, and + document the 3-part warm key vs 4-part vector cache key prefix comparison. + +## Compatibility + +- No schema/IPC/event changes. Pure main-process timing + build-packaging change. +- Existing on-disk `.duckdb` sidecars and embedding identities are untouched; the warm path opens + the same files via the same `openVectorStoreLocked`. +- Memory-disabled / no-embedding behavior is unchanged. + +## Risks + +- **Silent FTS-only forever** if a bundled extension fails to `LOAD` by path or macOS base64 asset + materialization fails: mitigate by logging and covering both raw and base64 load paths in smoke + checks. +- **Warm coalescing**: ensure `warmVectorStore` reuses the in-flight open and never opens a second + DuckDBInstance for the same file (it routes through `getVectorStore` → per-agent lock, which + already serializes — verify). +- **First-turn recall quality**: FTS-only on a cold turn is a deliberate, documented trade-off. +- **Build-time extension download dependency**: transient outages can fail releases. Retry transient + failures, but fail permanent 4xx immediately to surface unsupported DuckDB versions or target + triples. +- **Wrong-arch bundled extension**: cross-arch smoke only checks presence. Validate extension metadata + offline and run macOS x64 smoke on an Intel runner so the most likely missed target is load-tested. +- **Release/package drift**: release jobs call `electron-builder` directly, so package-level build + script hooks are not enough. Cover release workflow wiring with tests. +- **Dimension cooldown**: suppresses repeated `getDimensions` calls during outages; FTS recall remains + available and the cooldown retries automatically. + +## Test Strategy + +- `test/main` — `retrieve` cold path: with a deliberately slow `createVectorStore`, the first + `retrieve` resolves quickly, returns FTS-only, **does not call `getEmbeddings([query])`**, awaits + no store open, and schedules exactly one background warm; mark store ready and assert the next + `retrieve` uses the vector path. +- `test/main` — `warmVectorStore` opens the store once, sets readiness, and coalesces concurrent + calls; no second instance for the same agent. +- `test/main` — embedding-model switch self-heal: against an unusable / identity-mismatched store, + a cold `retrieve` does not block, schedules `reindexEmbeddings(force)`, and leaves the store + un-warmed (`vectorStoreReady` unset); after the reindex completes, a later `retrieve` restores the + vector path — i.e. no permanent FTS-only. +- `test/main` — memory-disabled / no-embedding: unchanged (no warm scheduled, FTS path as today). +- Build/smoke — `smoke:duckdb:vss` loads the bundled extension by path on Windows/Linux and loads + the packaged base64 asset on macOS without network install. +- Scripts — unit-test `installVss` helper parsing, retry classification, no-retry 404 failures, and + extension footer validation for valid, wrong-version, wrong-triple, and missing-signature cases. +- Memory — unit-test cold `searchMemories`, cold exact-provenance duplicate writes, cold semantic + neighbor writes, and embedding warmup drain in dispose/deleted-agent cleanup. +- Workflows — assert both build and release workflows install and smoke VSS before packaging, and mac + x64 release runs on `macos-15-intel`. +- Repository/runtime — unit-test targeted current-dimension and stale-existence queries, dimension + failure cooldown/retry, cold embedding prewarm coalescing, and consolidation warm-before-merge. +- Quality gates — `pnpm run format && pnpm run i18n && pnpm run lint && pnpm run typecheck`. diff --git a/docs/issues/memory-first-turn-cold-start/spec.md b/docs/issues/memory-first-turn-cold-start/spec.md new file mode 100644 index 000000000..8aa954fc9 --- /dev/null +++ b/docs/issues/memory-first-turn-cold-start/spec.md @@ -0,0 +1,130 @@ +# Memory First-Turn Cold Start Latency + +## Problem + +With ~100–200 stored memories, the **first chat turn after app start** is very slow: the +user's message is sent but no token streams back for a long time, even though the network is +healthy. Subsequent turns are fine. The stall also reproduces when an attachment is added. + +## Root Cause + +Memory injection is `await`ed before the first token streams +(`agentRuntimePresenter/index.ts:1005-1014` → `appendMemoryInjection` → `buildInjection` → +`retrieve`). On the first recall of the process, `retrieve` awaits opening the per-agent DuckDB +(VSS) vector store (`memoryPresenter/index.ts:1664` → `memoryVectorStore.ts:62-129`): +`DuckDBInstance.create` + `LOAD/INSTALL vss` + `SET hnsw_enable_experimental_persistence` + +materializing the persisted HNSW index. The opened store is cached in `this.vectorStores` +(`index.ts:2430-2432`), so the cold cost is paid once per process — which is exactly why only +the first turn is slow. + +Two amplifiers make the cold open severe rather than merely noticeable: + +1. **VSS extension is not bundled, so the first open hits a network `INSTALL vss`.** + `scripts/installVss.js:11-14` returns early on macOS (`Skipping DuckDB extension + installation on macOS`); `installRuntime` (package.json:65) and the `build:*` scripts never + run `installRuntime:duckdb:vss` (package.json:72); `electron-builder.yml:41-43` only copies + `./runtime/` as-is. So `memoryVectorStore.ts:72-74` falls back to `INSTALL vss; LOAD vss;`, + downloading the extension binary on the first recall. A failed/slow `LOAD` by path then + degrades silently to FTS via `retrieve`'s catch (`index.ts:1714`). +2. The persisted HNSW index load grows with corpus size; combined with users only stabilizing + embedding-recall usage after accumulating memories, this produces the perceived + "starts after 100–200 memories" correlation. (Note: the vector path is attempted whenever + `memoryEmbedding` is configured and the query is non-empty, regardless of row count — + `index.ts:1642-1647` — so row count is an amplifier, not a gate.) + +The attachment scenario shares the **same** memory-injection bottleneck (the recall query is +`normalizedInput.text` only, `index.ts:1011`; attachments never enter the memory query or +vector store). The attachment adds a separate, smaller `context-build` cost: prompt +concatenation, token estimation, a larger request body, and a synchronous `fs.readFileSync` +for audio (`contextBuilder.ts:314`). Fixing memory injection improves both scenarios. + +## Goal + +The first chat turn after app start must stream its first token without blocking on a DuckDB/VSS +cold open or a network `INSTALL vss`, regardless of memory count or attachments, while keeping +recall useful on that turn and restoring full hybrid (FTS + vector) recall on later turns. + +## Acceptance Criteria + +- **Hot path never blocks on a cold store open.** On a turn where the agent's vector store is + not yet warm, `retrieve` returns FTS-based recall without awaiting `getVectorStore`/the query + embedding, and schedules a background warm. First-token latency is bounded even when + `createVectorStore` is artificially slow. +- **Graceful degrade, not empty.** A cold first turn still returns keyword (FTS) recalled + memories; persona and working-memory injection are unaffected. +- **Full recall recovers.** After the background warm completes (or on any later turn with a + warm store), `retrieve` uses the full FTS + vector hybrid path as today. +- **Embedding-model switch still self-heals (no reindex regression).** Skipping the query + embedding on a cold turn must not skip the existing reindex triggers. After the user changes the + embedding model/dimension, a cold `retrieve` does not block; the background warm detects the + unusable / identity-mismatched store and fires `reindexEmbeddings(force)` exactly as the online + path does today (`index.ts:1659/1708`), without marking the store warm; once reindex completes, + later turns restore vector recall. No path can leave an agent permanently FTS-only. +- **VSS shipped on all platforms incl. macOS.** Packaged builds contain the matching-version VSS + extension under `runtime/duckdb/extensions/`; macOS packages store it as a base64(gzip) data asset + and materialize it into `userData` before `LOAD`, while Windows/Linux packages load the raw bundled + extension directly. No packaged path performs network `INSTALL vss`. +- **Background prewarm reduces cold turns.** After startup (and/or on session/window open), + enabled agents' vector stores and embedding connections are warmed off the hot path, decoupled + from the 60s maintenance delay — but correctness never depends on warm completing first. +- **No behavior change** when memory is disabled or no embedding model is configured. +- **Both reported scenarios improve**; the attachment's residual cost is limited to + `context-build`, with `memory-injection` no longer dominating the first turn. +- **VSS packaging is reliable and verifiable.** Build-time VSS downloads retry transient network + failures, fail fast on permanent 4xx errors with the DuckDB version and target triple in the + message, and validate the downloaded extension footer before it can be bundled. +- **macOS x64 VSS is truly smoke-tested.** CI must run the macOS x64 VSS smoke on an Intel runner + so that `LOAD ''` executes instead of only checking that the file exists. +- **Cold-store secondary callers are explicit.** Memory writes, maintenance duplicate scans, and + management search intentionally inherit the cold FTS-only behavior from `retrieve`; tests cover the + user-visible write/search cases so this does not remain an undocumented semantic change. +- **Embedding prewarm teardown is drained.** `dispose()` and deleted-agent cleanup wait for in-flight + embedding warmups just like vector warmups, keeping background-task teardown invariants consistent. +- **Release packaging matches CI packaging.** Release workflows must install and smoke-test the + matching VSS extension before packaging, and macOS x64 release packaging must run on an Intel + runner so the smoke performs a real `LOAD`. +- **Cold turns also prewarm embedding connections.** A cold vector store still returns FTS-only + immediately, but it also kicks a best-effort embedding connection warm in the background so the + next vector turn is less likely to pay provider cold-start cost. +- **Repeated warm dimension failures are throttled.** If an agent has no stored current embedding + dimension and `getDimensions` fails, subsequent cold turns suppress repeated dimension calls for a + short cooldown while continuing to answer from FTS. +- **Warm metadata checks are targeted.** Warm/reindex decisions use repository-level dimension and + stale-existence queries instead of materializing every embedded row. +- **Packaged VSS failures never download at runtime.** In packaged builds, a missing or unloadable + bundled VSS extension fails the vector store open and recall degrades to FTS; network `INSTALL vss` + is allowed only in dev/test paths. + +## Non-Goals + +- Do not redesign the vector store engine, switch vector DB, or change the HNSW parameters. +- Do not change memory extraction, consolidation, forgetting, or persona logic. +- Do not change which memories are semantically recalled — only **when** the vector path engages + on a cold first turn. +- Do not optimize attachment `context-build` (audio sync read, token estimation) here; track as a + follow-up if profiling shows it material after the memory fix. + +## Decisions + +- **macOS uses a base64(gzip) packaged VSS data asset.** DuckDB's macOS extension requires a footer + (`duckdb_signature` metadata) that makes the Mach-O fail Apple `codesign --strict` + validation. The footer cannot be removed because DuckDB then refuses to load the extension. A raw + gzip asset also fails notarization because notarytool recursively expands known archives and scans + the contained Mach-O. Decision: install and smoke-test macOS VSS before packaging, then write a + base64(gzip) data asset during `afterPack` and remove the raw Mach-O from the `.app`; at runtime a + packaged app decodes the asset, materializes the original extension into `userData`, and loads that + file. If materialization or load fails, vector store open fails closed and recall stays on FTS. + Network `INSTALL vss` remains a dev/test fallback only. +- **The VSS install script becomes platform/arch-aware** (`--platform`/`--arch`) and is invoked by + each build target with its matching target triple; cross-builds must never bundle the host + machine's architecture. See plan P0-B. +- **No checksum pin in this follow-up.** The build-time guard is DuckDB extension footer validation + (`duckdb_signature`, version, and target triple) plus real load-by-path smoke. `--repository` and + `DUCKDB_EXTENSION_REPOSITORY` remain available for mirrors and CI. +- **Release workflow parity is required.** Package-level `build:*` VSS install hooks protect local + and manual builds, but release jobs call `electron-builder` directly and must explicitly run + install/smoke before packaging. + +## Open Questions + +None. diff --git a/docs/issues/memory-first-turn-cold-start/tasks.md b/docs/issues/memory-first-turn-cold-start/tasks.md new file mode 100644 index 000000000..31b986589 --- /dev/null +++ b/docs/issues/memory-first-turn-cold-start/tasks.md @@ -0,0 +1,77 @@ +# Memory First-Turn Cold Start Latency Tasks + +## P0-A — Hot-path FTS zero-wait +- [x] Add a `getDimensions` port to `MemoryPresenterDeps` (`types.ts:251-274`) and wire it to `llmproviderPresenter.getDimensions` (`presenter/index.ts:575`). +- [x] Add `vectorStoreReady: Map` keyed by the full `vectorStoreCacheKey` identity (`agentId::providerId::modelId::dim`); set it **only** when `openVectorStoreLocked` resolves to an `isUsable()` store; leave unset on the unusable branch (`index.ts:1703-1711`). +- [x] Clear `vectorStoreReady` wherever the cache is evicted: `closeVectorStore` (`2408-2409`), `dispose` (`2343-2344`), and the `openVectorStoreLocked` open-failure catch (`2437-2438`). +- [x] Add `isVectorStoreWarm(agentId, embedding)` (sync, full-identity compare) and `warmVectorStore(agentId, embedding)` (background, coalesced but re-runnable across turns; dim only from a stored `embedding_dim` row whose `embedding_model === embeddingFingerprint(providerId, modelId)` (`index.ts:114`), else `getDimensions` — **never** a query embedding or a stale-fingerprint row). +- [x] In `warmVectorStore`, reproduce the reindex triggers the cold path skips: store not `isUsable()` → `void reindexEmbeddings(agentId, true)` (`index.ts:1708`), no ready; usable but `hasStaleEmbeddings(...)` → `void reindexEmbeddings(agentId)` (`index.ts:1659`), no ready; usable + not stale → set ready. +- [x] Gate the vector block in `retrieve`: cold → FTS-only this turn + `void warmVectorStore`; warm → existing hybrid path. Preserve teardown guards and background backfill/reindex triggers. +- [x] Tests: cold `retrieve` resolves fast, returns FTS-only, **does not call `getEmbeddings([query])`**, awaits no store open, and schedules exactly one warm; warm `retrieve` uses the vector path; memory-disabled / no-embedding unchanged. +- [x] Test — embedding-model switch: with an unusable/identity-mismatched store, cold `retrieve` does not block, schedules `reindexEmbeddings(force)`, and leaves the store un-warmed; after reindex completes, a later `retrieve` restores the vector path (no permanent FTS-only). + +## P0-B — Ship VSS on all platforms + smoke +- [x] Make `scripts/installVss.js` platform/arch-aware: remove the macOS early `return`; accept `--platform`/`--arch` selecting the **target** triple (not host arch), version-locked to `@duckdb/node-api`. +- [x] Invoke the arch-matching install from each build target (`build:mac:arm64` → `--platform darwin --arch arm64`, `build:mac:x64` → `--arch x64`, win/linux equivalents) so `runtime/duckdb/extensions/` is bundled by `electron-builder` (`electron-builder.yml:41-43`). Do not blanket-add host-arch `installRuntime:duckdb:vss` to all builds. +- [x] `memoryVectorStore.loadVss()`: load the bundled extension by explicit path; packaged builds fail closed to FTS when the bundled extension is missing or invalid, while dev/test keeps the logged network `INSTALL vss` fallback. +- [x] Extend `scripts/smoke-duckdb-vss.js` to assert the extension loads via `LOAD ''` from the bundled path with no network `INSTALL`; run in CI / build preflight. + +## P1 — Background prewarm +- [x] Add `warmActiveAgents()` (vector store + tiny embedding warm) for enabled managed agents. +- [x] Trigger an early startup warm pass and/or session/window-open warm, decoupled from `MAINTENANCE_START_DELAY_MS`; reuse `armActiveAgentsStaggered` staggering. +- [x] Test: warm pass opens each enabled agent's store once off the hot path; best-effort, no correctness dependency. + +## Follow-up — Verified VSS and cold-store coverage +- [x] `scripts/installVss.js`: export import-safe helpers and guard CLI execution so tests can import parsing/download/validation logic without installing. +- [x] Add retry with exponential backoff for network errors, HTTP 408/429, and 5xx; fail permanent 4xx without retry and include version + triple + URL in the error. +- [x] Validate the downloaded extension footer (`duckdb_signature`, expected DuckDB version, expected target triple) before moving it into `runtime/duckdb/extensions/`. +- [x] Update macOS build workflow so x64 VSS smoke runs on `macos-15-intel` and arm64 runs on `macos-15`. +- [x] Add comments/tests for deliberate cold-store FTS-only behavior in `coordinateWrite` and `searchMemories`. +- [x] Drain `embeddingWarmups` during `dispose()` and `cleanupDeletedAgentResources()`. +- [x] Add `test/main/scripts/installVss.test.ts` for retry/no-retry, metadata validation, and argument parsing. +- [x] Add memory presenter tests for cold `searchMemories`, cold exact duplicate writes, cold semantic-neighbor writes, and embedding-warmup teardown. + +## Remaining follow-up — Release parity and runtime polish +- [x] Update `.github/workflows/release.yml` to install and smoke-test target VSS extensions before each `electron-builder` call. +- [x] Change macOS release x64 packaging to run on `macos-15-intel`; keep arm64 on `macos-15`. +- [x] Update workflow tests to cover release/build VSS install+smoke and mac Intel runner wiring. +- [x] Cold `retrieve` also starts `warmEmbeddingConnection` without awaiting it. +- [x] Add 30s cooldown for failed warm dimension resolution keyed by `agentId::providerId::modelId`. +- [x] Add repository-level current-dimension and stale-embedding queries and use them in warm/drain ready checks. +- [x] Best-effort await `warmVectorStore` at the start of offline `mergeNearDuplicates`. +- [x] Rename the current-embedding guard and document warm-key/cache-key prefix matching. +- [x] Replace fixed-cycle background memory test flushing with a poll-until-condition helper where waiting for warm/reindex state matters. +- [x] Add/adjust tests for cold embedding prewarm, dimension cooldown, targeted repository queries, and consolidation warm-before-merge. + +## CodeRabbit follow-up — Packaged VSS and guardrails +- [x] Document packaged VSS fail-closed behavior; network `INSTALL vss` is dev/test only. +- [x] Packaged `MemoryVectorStore` load fails closed instead of falling back to network install. +- [x] Track prewarm timers per agent and clear pending deleted-agent prewarm callbacks. +- [x] Make current embedding dimension lookup deterministic in SQL and fake repository. +- [x] Add per-attempt VSS download timeout and fail-fast smoke CLI parsing. +- [x] Add post-package VSS smoke checks to build and release workflows. +- [x] Tighten workflow runner assertions and targeted dimension fixtures/tests. +- [x] Close partially opened DuckDB handles when `MemoryVectorStore.create()` fails during open/init. +- [x] Cover same-`created_at` current-dimension tie-breaks in SQL and fake repository tests. +- [x] Align P0-B plan wording and Linux packaged VSS smoke shell configuration. + +## macOS packaging follow-up — DuckDB VSS codesign compatibility +- [x] Document that macOS DuckDB VSS requires a footer that fails Apple strict codesign when shipped as a raw Mach-O. +- [x] Document that notarytool recursively scans raw gzip assets and rejects the contained unsigned DuckDB Mach-O. +- [x] Encode the macOS packaged VSS extension as base64(gzip) during `afterPack` and delete the raw extension before codesign/notarization. +- [x] Materialize packaged macOS VSS base64 assets into `userData` before runtime `LOAD`. +- [x] Extend smoke checks and macOS workflows to validate the packaged base64 path. +- [x] Add afterPack, smoke, runtime, and workflow tests for the base64 materialization path. +- [x] Make macOS base64 materialization async and process-coalesced to avoid repeated read/hash/inflate work. +- [x] Re-materialize packaged macOS VSS if a cached `userData` extension path is deleted mid-process. +- [x] Clean up smoke-test temp directories when base64/gzip materialization fails. +- [x] Keep smoke-test temp cleanup best-effort so cleanup errors do not mask original failures. + +## Validation +- [ ] Manual: with 100–200 memories, cold-start the app and confirm the first normal text turn streams promptly; repeat with an attachment. Compare `logSlowPreStreamStep('memory-injection')` on the first and later turns and confirm memory injection is no longer the dominant pre-stream step; attachment overhead, if any, should show under `context-build`. +- [x] `pnpm run format && pnpm run i18n && pnpm run lint && pnpm run typecheck`. +- [x] Targeted `test/main` memory suites + `pnpm run smoke:duckdb:vss`. +- [x] Follow-up targeted tests: `test/main/scripts/installVss.test.ts`, `test/main/presenter/memoryPresenter.test.ts`, and `test/main/presenter/pluginPresenter.test.ts`. +- [x] Remaining follow-up targeted tests: `test/main/presenter/memoryPresenter.test.ts`, `test/main/presenter/pluginPresenter.test.ts`, and `test/main/scripts/installVss.test.ts`. +- [x] CodeRabbit follow-up targeted tests: `test/main/presenter/memoryPresenter.test.ts`, `test/main/presenter/memoryVectorStore.test.ts`, `test/main/presenter/pluginPresenter.test.ts`, `test/main/scripts/installVss.test.ts`, and `test/main/presenter/agentMemoryTable.test.ts`. +- [ ] Full `pnpm test -- --run` is not green because of existing renderer failures unrelated to this issue: `ChatTabView.test.ts`, `MemoryConfigPanel.test.ts`, and `NewThreadPage.test.ts`. diff --git a/docs/issues/model-list-fetch-404/plan.md b/docs/issues/model-list-fetch-404/plan.md new file mode 100644 index 000000000..5be1250a8 --- /dev/null +++ b/docs/issues/model-list-fetch-404/plan.md @@ -0,0 +1,19 @@ +# Plan + +## Cause + +`BaseLLMProvider.fetchModels()` wraps `this.fetchProviderModels().then(...)` in a synchronous `try/catch`. If `fetchProviderModels()` rejects asynchronously (for example `AiSdkProvider.requestProviderJson()` throws `ProviderHttpError` after fetch returns a 404), the rejection bypasses the catch block and propagates to `ModelManager.getModelList()` and the `models:list` route. + +## Implementation + +- Convert `BaseLLMProvider.fetchModels()` to `async`/`await` so both synchronous throws and asynchronous rejections are caught by the existing error handling. +- Preserve `suppressErrors` semantics: + - default `true` returns `[]` on failure; + - `false` rethrows. +- Keep model validation and `configPresenter.setProviderModels()` behavior unchanged. + +## Test strategy + +- Add a regression test with a provider whose `fetchProviderModels()` rejects asynchronously. +- Assert default `fetchModels()` returns `[]` and does not throw. +- Assert `fetchModels({ suppressErrors: false })` still rejects. diff --git a/docs/issues/model-list-fetch-404/spec.md b/docs/issues/model-list-fetch-404/spec.md new file mode 100644 index 000000000..7c4a6edd6 --- /dev/null +++ b/docs/issues/model-list-fetch-404/spec.md @@ -0,0 +1,30 @@ +# Model List Fetch 404 Handling + +## User need + +When a provider model-list endpoint returns HTTP 404 (for example a provider/base URL that does not expose `/models`), the app should not surface an unhandled `deepchat:route:invoke` error for routine model-list refreshes. + +## Goal + +Keep runtime model-list route behavior resilient: default model fetches should honor the existing `suppressErrors` behavior and return an empty/cached-safe list instead of rejecting the IPC route. + +## Acceptance criteria + +- A rejected asynchronous provider model fetch is caught by `BaseLLMProvider.fetchModels()` when `suppressErrors` is true. +- `refreshModels()` / explicit non-suppressed fetches still propagate provider HTTP errors. +- The fix does not change provider authentication, request headers, or endpoint construction. +- A regression test covers asynchronous rejection from `fetchProviderModels()`. + +## Constraints + +- Do not log or expose provider API keys or credentials. +- Keep the change minimal and aligned with existing presenter/provider boundaries. + +## Non-goals + +- Changing provider base URLs or model endpoint formats. +- Masking errors for explicit force refresh paths that intentionally request non-suppressed behavior. + +## Open questions + +None. diff --git a/docs/issues/model-list-fetch-404/tasks.md b/docs/issues/model-list-fetch-404/tasks.md new file mode 100644 index 000000000..3a5317254 --- /dev/null +++ b/docs/issues/model-list-fetch-404/tasks.md @@ -0,0 +1,10 @@ +# Tasks + +- [x] Map stack trace to source and identify failure path. +- [x] Document issue/spec/plan. +- [x] Change `BaseLLMProvider.fetchModels()` catch behavior to handle async rejections. +- [x] Add regression tests for suppressed and non-suppressed async fetch failures. +- [x] Run targeted provider tests and relevant typecheck/lint checks. + +- [x] Address review feedback so only provider fetch failures are suppressed; validation/persistence failures now surface. +- [x] Add regression coverage for provider model persistence failures. diff --git a/docs/issues/plugin-hub-available-title/spec.md b/docs/issues/plugin-hub-available-title/spec.md new file mode 100644 index 000000000..37d71b211 --- /dev/null +++ b/docs/issues/plugin-hub-available-title/spec.md @@ -0,0 +1,50 @@ +# Plugin Hub Available Title + +## User Need + +The plugin hub should not show Official, Workspace, and Personal tabs before those plugin categories are supported. + +## Goal + +Replace the unused category tabs with a single `可用插件` heading. + +## Acceptance Criteria + +- The plugin catalog section shows a localized "Available plugins" heading. +- Official, Workspace, and Personal filter tabs are not rendered. +- Plugin search still filters the available catalog items. +- Completed SDD folders keep only `spec.md`. + +## UI Sketch + +Before: + +```text +[DeepChat 官方] [工作区] [个人] ++--------------------+ +--------------------+ +| CUA | | 飞书 / Lark | ++--------------------+ +--------------------+ +``` + +After: + +```text +可用插件 ++--------------------+ +--------------------+ +| CUA | | 飞书 / Lark | ++--------------------+ +--------------------+ +``` + +## Constraints + +- Keep this renderer-only. +- Do not add workspace/personal plugin category behavior. + +## Non-Goals + +- Redesigning plugin cards. +- Adding plugin source classification. + +## Open Questions + +- None. diff --git a/docs/issues/plugin-hub-single-list/spec.md b/docs/issues/plugin-hub-single-list/spec.md new file mode 100644 index 000000000..c901695b4 --- /dev/null +++ b/docs/issues/plugin-hub-single-list/spec.md @@ -0,0 +1,58 @@ +# Plugin Hub Single List + +## User Need + +The plugin hub should avoid a separate Added section and search field while the plugin catalog is small. + +## Goal + +Show one available plugin list, with enabled plugins first, clear action labels, and highlighted enabled state. + +## Acceptance Criteria + +- The standalone Added section is removed. +- The search input is removed. +- Enabled plugins sort before disabled plugins. +- Enabled plugin action button says `管理`. +- Disabled plugin action button says `添加`. +- Enabled status badges use a highlighted color. +- Completed SDD folders keep only `spec.md`. + +## UI Sketch + +Before: + +```text +[Search plugins] [Refresh] + +已添加 +[CUA icon] + +可用插件 +[CUA] [管理] [已启用] +[Telegram] [管理] [未启用] +``` + +After: + +```text +插件 [Refresh] + +可用插件 +[CUA] [管理] [已启用 highlighted] +[Telegram] [添加] [已停用] +``` + +## Constraints + +- Keep the change renderer-only. +- Do not add new plugin category support. + +## Non-Goals + +- Redesigning plugin cards. +- Adding search back behind a feature flag. + +## Open Questions + +- None. diff --git a/docs/issues/plugin-remote-icon-consistency/spec.md b/docs/issues/plugin-remote-icon-consistency/spec.md new file mode 100644 index 000000000..d2c44bef5 --- /dev/null +++ b/docs/issues/plugin-remote-icon-consistency/spec.md @@ -0,0 +1,30 @@ +# Plugin Remote Detail Consistency + +## User Need + +Remote-control plugins should keep the same icon and localized title treatment when moving between the plugin list and detail page. + +## Goal + +Make remote virtual plugins and the official Feishu/Lark plugin use the same icon, color, and localized title shown by remote channel metadata. + +## Acceptance Criteria + +- Feishu/Lark catalog and detail both show the message-circle icon with the blue remote color. +- Remote virtual plugin details keep their catalog icon color. +- In Chinese, Feishu/Lark keeps the localized `飞书 / Lark` title instead of flashing to the plugin manifest name. +- Non-remote official plugins still use the generic puzzle icon. + +## Constraints + +- Keep the fix in the renderer plugin detail page. +- Do not change plugin manifest data or remote-control settings behavior. + +## Non-Goals + +- Redesign the plugin hub layout. +- Add new icon configuration infrastructure. + +## Open Questions + +- None. diff --git a/docs/issues/pr1818-plugin-review-fixes/spec.md b/docs/issues/pr1818-plugin-review-fixes/spec.md new file mode 100644 index 000000000..9969ab51d --- /dev/null +++ b/docs/issues/pr1818-plugin-review-fixes/spec.md @@ -0,0 +1,37 @@ +# PR 1818 Plugin Review Fixes + +## User Need + +Review comments on PR #1818 identify a few plugin-page issues that should be fixed before merge when they are directly related to the recent Plugins Hub work. + +## Goal + +Apply the low-risk plugin-page review fixes that keep the current PR focused: + +- Refresh the embedded Feishu remote settings after the official Feishu plugin is enabled or disabled from the detail page header. +- Make plugin component tests assert concrete localized strings instead of raw i18n keys for titles, headings, and action labels. + +## Acceptance Criteria + +- The official Feishu plugin detail page remounts its embedded `RemoteSettings` after the top enable or disable action updates Feishu remote settings. +- `PluginsCatalogPage` tests use distinct translated values for asserted catalog title, heading, status, and action label keys. +- `OfficialPluginDetailPage` tests use distinct translated values for asserted title and action button keys. +- Existing plugin catalog and official plugin detail tests pass. +- `pnpm run format`, `pnpm run i18n`, and `pnpm run lint` pass. + +## Constraints + +- Keep the diff scoped to PR #1818 plugin-page review comments. +- Preserve existing Vue 3 Composition API and i18n patterns. +- Do not mix larger SkillPresenter, SkillSyncPresenter, or git-install hardening work into this fix. + +## Non-Goals + +- No changes to skill install rollback behavior. +- No changes to agent skill adoption/link cleanup. +- No changes to git repository validation. +- No layout redesign. + +## Open Questions + +None. diff --git a/docs/issues/remote-topic-thread-copy/spec.md b/docs/issues/remote-topic-thread-copy/spec.md new file mode 100644 index 000000000..3b5b2d6e5 --- /dev/null +++ b/docs/issues/remote-topic-thread-copy/spec.md @@ -0,0 +1,44 @@ +# Remote Topic Thread Copy + +## User Need + +The Simplified Chinese remote-control copy should not use the awkward phrase `话题线程`. + +## Goal + +Replace `话题线程` with `会话` in the zh-CN remote-control descriptions and access rule. + +## Acceptance Criteria + +- Telegram remote description says `私聊、群聊和会话远程控制`. +- Feishu remote description says `私聊、群聊和会话远程控制`. +- The group access rule says `群聊和会话里`. +- Topic ID field labels remain unchanged. + +## UI Sketch + +Before: + +```text +接入 Telegram Bot,支持私聊、群聊和话题线程远程控制。 +``` + +After: + +```text +接入 Telegram Bot,支持私聊、群聊和会话远程控制。 +``` + +## Constraints + +- zh-CN copy only. +- No behavior or schema changes. + +## Non-Goals + +- Renaming topic/thread IDs. +- Updating non-Chinese locales. + +## Open Questions + +- None. diff --git a/docs/issues/sidebar-chat-workspace-sort/spec.md b/docs/issues/sidebar-chat-workspace-sort/spec.md new file mode 100644 index 000000000..4b7181b13 --- /dev/null +++ b/docs/issues/sidebar-chat-workspace-sort/spec.md @@ -0,0 +1,52 @@ +# Sidebar Chat Workspace Sort Spec + +## User Need + +The expanded sidebar must keep Chat and Workspace as separate sections. The Workspace sort toggle +must not move workspace sessions into Chat or make the Workspace section disappear. + +## Goal + +- Chat sessions remain under Chat and can be collapsed. +- Workspace sessions remain under Workspace. +- The Workspace toggle only changes Workspace grouping between project/date modes. + +## Acceptance Criteria + +- Clicking Chat collapses and expands Chat sessions. +- The Chat section icon matches the chat icon used by the new-thread project selector. +- In date mode, date groups for workspace sessions render under Workspace, not Chat. +- Workspace stays visible in the same sidebar position after toggling grouping. +- Pinned sessions stay independent. + +## Constraints + +- Keep the fix local to the renderer sidebar. +- Do not add dependencies or new persistent settings. +- Do not touch unrelated Skills work already dirty in the worktree. + +## Non-Goals + +- Redesign the sidebar. +- Change session storage, pagination, or pin behavior. + +## UI Shape + +Before: + +```text +Pinned +Chat + Recent / Earlier workspace groups +Workspace +``` + +After: + +```text +Pinned +Chat [collapsible] + chat sessions only +Workspace [project/date toggle] + workspace groups only +``` diff --git a/docs/issues/skill-scope-and-refresh/plan.md b/docs/issues/skill-scope-and-refresh/plan.md new file mode 100644 index 000000000..9ff5ea5c0 --- /dev/null +++ b/docs/issues/skill-scope-and-refresh/plan.md @@ -0,0 +1,57 @@ +# Plan + +## Approach + +Split skill state into three semantics: + +1. Explicit/manual session activation remains pinned state backed by `SkillPresenter.setActiveSkills` and `new_sessions.active_skills`. +2. Composer-selected skills are message-scoped. The chat input keeps a local draft skill list, submits it with the next message as `SendMessageInput.activeSkills`, then clears the local list after successful submit/queue/steer/create-session. +3. Agent `skill_view` root activation is runtime/message-loop state. The tool returns activation metadata, and the agent runtime keeps a per-generation/per-session set of runtime-activated skill names for the active message loop. + +Do not overload `isPinned` for runtime activation. `isPinned` must mean persisted/manual session pin only. Agent tool results may add separate fields such as `activatedForMessage`, `activationScope`, and `activeForCurrentMessage` to communicate runtime activation without polluting pinned semantics. + +When a message starts with composer active skills, initialize the runtime effective skill set with `manual session skills + message active skills`. When runtime activation changes the effective skill set, refresh both tools and the leading system prompt before the next provider request. + +## Affected Interfaces + +- `SendMessageInput`: add optional `activeSkills?: string[]` for message-scoped composer skill context. +- Route schemas for chat send/steer/pending inputs: accept `activeSkills`. +- `CreateSessionInput`: keep `activeSkills` for compatibility but treat it as initial-message active skills, not session pinning. +- `ChatInputBox` / `useSkillsData`: make selected skills local composer state for existing conversations and expose consume/snapshot helpers. +- `ChatPage` / `NewThreadPage`: include consumed composer skills in submitted message payload and clear after successful submit. +- `AgentSessionPresenter`: stop persisting create-session `activeSkills`; pass them into the initial message payload. +- `AgentRuntimePresenter`: initialize runtime message skills from normalized input, persist them on the user message record, materialize them back from normalized user-message tables, and include them in prompt/tool loading for that message loop. +- `SkillTools.handleSkillView`: support viewing without presenter-side activation for agent runtime calls. +- `AgentToolManager`: derive `activationApplied` from runtime active skill context instead of persisted active skills, and return message-scoped activation metadata without setting `isPinned` to true. +- System/tool prompts: replace pinned wording for automatic `skill_view` activation with message-scoped activation wording. + +## Data Flow + +- Composer skill selection updates local input state only. +- Submit path consumes local selected skills and sends `{ text, files, activeSkills }`. +- New session creation passes active skills inside the initial message payload and does not call `setActiveSkills`. +- Runtime start resets runtime activated skills, adds message-scoped active skills, then computes effective skills = session-pinned + message/runtime active. +- User message content stores `activeSkills` so the message context is visible/auditable and retry can reuse it. The normalized materialization path must preserve the raw message-scoped `activeSkills` because the structured `deepchat_user_messages` table stores text/search/think but not skill names. +- `skill_view` root call returns `activatedSkill` when the viewed skill is not already effective. +- Runtime callback adds it to the local runtime set for the active message loop. +- Tool refresh uses `manual + message/runtime` effective skills. +- System prompt refresh rebuilds with the same effective skill set and replaces the first system message in the active conversation messages. +- Tool output keeps `isPinned` equal to the persisted/manual pin state and reports runtime activation separately. + +## Compatibility + +- Existing manual active skills remain stored in `new_sessions.active_skills`. +- Existing `skill_view` route behavior outside agent runtime remains read-only unless explicitly using session active APIs. +- Existing skill_run scripts remain gated by active skill names, now including message/runtime activation for the current loop. +- Existing consumers of `isPinned` can continue treating it as pinned/session state. +- Existing stored messages without `activeSkills` continue to parse as empty message-scoped skills. + +## Test Strategy + +- Unit-level tests for `SkillTools.handleSkillView` avoiding persisted activation. +- Unit-level tests for `AgentToolManager` activation metadata and non-pinned output. +- Unit-level tests for `processStream` refreshing tools and system prompt after runtime activation. +- Runtime tests ensuring `SendMessageInput.activeSkills` influences initial prompt/tools but does not persist session active skills. +- Renderer tests ensuring composer skills clear after submit, are sent in the message payload, and appear on the corresponding user message item. +- Message store tests ensuring materialized user message content preserves `activeSkills` from the raw message JSON. +- Run targeted tests plus required repository checks: `pnpm run format`, `pnpm run i18n`, `pnpm run lint`. diff --git a/docs/issues/skill-scope-and-refresh/spec.md b/docs/issues/skill-scope-and-refresh/spec.md new file mode 100644 index 000000000..bcdd86275 --- /dev/null +++ b/docs/issues/skill-scope-and-refresh/spec.md @@ -0,0 +1,59 @@ +# Skill Scope and Refresh + +## User Need + +Skills selected or discovered for a task should follow the message/task chain, not remain fixed in the conversation composer or permanently pollute an entire session. When the user selects a skill in the input box, that skill should be attached to the message being sent, then cleared from the input composer. When the model calls `skill_view`, the skill can become usable for the current generation flow, but it must not be represented as conversation-pinned state unless the user manually pins it elsewhere. + +## Problem + +Historically, root `skill_view` automatically called `setActiveSkills`, which persisted the skill in `new_sessions.active_skills`. This made model-selected skills follow the whole session instead of the relevant message/task chain. + +The first runtime-scoped fix stopped persistence for agent `skill_view`, but the user-facing composer path still used conversation active skills. Selecting a skill in the input box called `setActiveSkills` for existing sessions, stored the skill at session level, and kept the chip fixed in the input box after sending. That is semantically wrong: composer skill chips are task/message context, not global conversation pinned state. + +After moving composer skills to the outgoing message payload, the message list must still preserve and render that message-scoped metadata. The normalized user-message materialization path must not drop `activeSkills` when rebuilding message content for the renderer. + +In the same generation loop, skill activation must also refresh tool definitions and the system prompt so subsequent provider requests can use the activated skill immediately. + +## Goals + +- Keep explicit/manual session-pinned skills as session-level active skills where APIs still use `setActiveSkills`. +- Make composer-selected skills message-scoped: attach them to the next sent/queued/steered message and clear the composer chip after successful submission. +- Make model-triggered `skill_view` activation message-scoped/runtime-scoped rather than persisted to the session. +- Show message-scoped skills on the corresponding user message item so the user can see which skills were applied to that turn, without placing those chips inside the message bubble body. +- Do not report runtime activation as `isPinned: true`. +- Expose explicit activation metadata such as `activationScope: "message"` / `activatedForMessage: true` for agent `skill_view` results. +- Ensure runtime skill activation refreshes tool definitions and the active skill prompt for subsequent provider requests in the same generation loop. +- Preserve existing manual skills settings behavior and new-thread/session creation compatibility. + +## Acceptance Criteria + +- Calling agent tool `skill_view` for a root `SKILL.md` no longer writes to `new_sessions.active_skills`. +- The `skill_view` content for runtime activation does not claim `isPinned: true` unless the skill was manually/session pinned. +- The `skill_view` result clearly reports current-message activation when applicable. +- Selecting a skill in the chat input does not call `setActiveSkills` for the conversation. +- Sending, queueing, steering, or creating a new first-turn session with composer skills sends those skills on that message payload. +- After successful submission, composer skill chips are cleared from the input box. +- User message records preserve their message-scoped skills when they are created, materialized from normalized tables, cloned, edited, or backfilled. +- User message items display their message-scoped skills as lightweight metadata adjacent to the bubble, not inside the bubble content, and do not show them as composer/session pinned skills. +- System prompt wording distinguishes session-pinned skills from message-activated skills and does not tell the model that root `skill_view` pins a skill to the conversation. +- Subsequent provider requests in the same tool loop receive rebuilt system prompt content that includes message-scoped/runtime-activated skills. +- `skill_run` exposure after runtime activation uses the union of manually active and message/runtime-activated skills. +- Manual `setActiveSkills` continues to persist active skills at the session level. +- Relevant tests cover non-persistence, non-pinned output, message payload skills, message item visibility, and same-loop refresh behavior where practical. + +## Non-goals + +- Full branch/lineage-scoped persisted skill state across future turns. +- Redesigning the settings skills UI. +- Adding a new debug UI for skills. +- Changing MCP tool permissions or authentication behavior. + +## Constraints + +- Keep changes minimal and aligned with existing presenter boundaries. +- Avoid introducing secret logging or broad trace payloads. +- Do not remove manual session-level skill support. + +## Open Questions + +None. diff --git a/docs/issues/skill-scope-and-refresh/tasks.md b/docs/issues/skill-scope-and-refresh/tasks.md new file mode 100644 index 000000000..4a91da272 --- /dev/null +++ b/docs/issues/skill-scope-and-refresh/tasks.md @@ -0,0 +1,20 @@ +# Tasks + +- [x] Update skill tool view path so agent `skill_view` does not persist active skills. +- [x] Add runtime activation context for effective skills during a generation loop. +- [x] Refresh tools using effective manual + runtime skills. +- [x] Refresh leading system prompt after runtime skill activation. +- [x] Stop representing runtime activation as conversation-pinned state in tool output and prompts. +- [x] Add/adjust tests for message-scoped activation semantics. +- [x] Add message-scoped `SendMessageInput.activeSkills` plumbing. +- [x] Change composer skill selection to local message draft state instead of session active state. +- [x] Send/queue/steer/create first-turn messages with composer skills and clear chips after submit. +- [x] Initialize runtime effective skills from message active skills and persist them on the user message record. +- [x] Preserve `activeSkills` when user messages are materialized from normalized message tables. +- [x] Display message-scoped skills on the corresponding user message item and cover it with renderer tests. +- [x] Move message-scoped skill chips out of the message bubble and restyle them as subtle message metadata. +- [x] Add renderer/runtime tests for composer message-scoped skills. +- [x] Run `pnpm run format`, `pnpm run i18n`, and `pnpm run lint` after the visibility fix. + +- [x] Address review feedback for active skill fallback data, wording, refresh parameter flow, and composer naming clarity. +- [x] Strengthen process stream coverage for hook-backed message-scope skill activation refresh. diff --git a/docs/issues/vueuse-typecheck-dedupe/plan.md b/docs/issues/vueuse-typecheck-dedupe/plan.md new file mode 100644 index 000000000..b2d25c2aa --- /dev/null +++ b/docs/issues/vueuse-typecheck-dedupe/plan.md @@ -0,0 +1,17 @@ +# Plan + +## Cause + +The root project depends on `@vueuse/core@12.8.2`, which depends on its own `vue@3.5.34`. The app and newer UI dependencies use `vue@3.5.39`, so `vue-tsgo` sees incompatible `Ref` and `ComputedRef` symbols across the graph. + +## Implementation + +- Update the root `@vueuse/core` dev dependency to `^14.3.0`, matching the version already used by `reka-ui`. +- Regenerate the lockfile with pnpm 10. +- Keep code unchanged unless typecheck reveals a real API incompatibility. + +## Test strategy + +- Run `pnpm why vue @vueuse/core @vueuse/shared` to confirm dedupe. +- Run `pnpm run typecheck`. +- Re-run release-required `format`, `i18n`, and `lint`. diff --git a/docs/issues/vueuse-typecheck-dedupe/spec.md b/docs/issues/vueuse-typecheck-dedupe/spec.md new file mode 100644 index 000000000..2f6acd930 --- /dev/null +++ b/docs/issues/vueuse-typecheck-dedupe/spec.md @@ -0,0 +1,30 @@ +# VueUse Typecheck Dedupe + +## User need + +Release checks should not fail because renderer type declarations load two different Vue minor versions. + +## Goal + +Keep the renderer dependency graph on one Vue type identity so `pnpm run typecheck` can complete before cutting the beta release branch. + +## Acceptance criteria + +- `pnpm why vue @vueuse/core @vueuse/shared` no longer shows root `@vueuse/core` pulling `vue@3.5.34`. +- `pnpm run typecheck` passes or fails only on unrelated issues. +- The fix does not add a new dependency. + +## Constraints + +- Use an already-installed dependency version when possible. +- Do not change renderer behavior. +- Keep release metadata separate from this dependency fix. + +## Non-goals + +- Broad dependency refresh. +- Refactoring Vue components or composables. + +## Open questions + +None. diff --git a/docs/issues/vueuse-typecheck-dedupe/tasks.md b/docs/issues/vueuse-typecheck-dedupe/tasks.md new file mode 100644 index 000000000..c57952764 --- /dev/null +++ b/docs/issues/vueuse-typecheck-dedupe/tasks.md @@ -0,0 +1,7 @@ +# Tasks + +- [x] Identify duplicate Vue type source in the dependency graph. +- [x] Document the issue and minimal dependency plan. +- [x] Align root `@vueuse/core` with the existing Vue peer version. +- [x] Validate dependency graph and typecheck. +- [x] Re-run release-required checks. diff --git a/docs/issues/windows-arm64-duckdb-upgrade/spec.md b/docs/issues/windows-arm64-duckdb-upgrade/spec.md index aeb6c662b..cb22c92c2 100644 --- a/docs/issues/windows-arm64-duckdb-upgrade/spec.md +++ b/docs/issues/windows-arm64-duckdb-upgrade/spec.md @@ -24,8 +24,8 @@ That package version only ships native bindings for: It does not ship a `win32-arm64` binding, so the app crashed while loading the built-in knowledge base presenter on Windows ARM64. -DeepChat now depends on `@duckdb/node-api@1.5.3-r.1`. The matching lockfile includes -`@duckdb/node-bindings-win32-arm64@1.5.3-r.1`, and the Windows ARM64 workflow runs +DeepChat now depends on `@duckdb/node-api@1.5.4-r.1`. The matching lockfile includes +`@duckdb/node-bindings-win32-arm64@1.5.4-r.1`, and the Windows ARM64 workflow runs `pnpm run smoke:duckdb:vss` before app launch smoke coverage. ## User Stories diff --git a/package.json b/package.json index c20dbd759..d5f548691 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "DeepChat", - "version": "1.0.7", + "version": "1.0.8-beta.1", "description": "DeepChat,一个简单易用的 Agent 客户端", "main": "./out/main/index.js", "author": "ThinkInAIXYZ", @@ -52,15 +52,15 @@ "plugin:cua:build:win:arm64": "node scripts/build-cua-plugin-runtime.mjs --platform win32 --arch arm64", "plugin:cua:build:linux:x64": "node scripts/build-cua-plugin-runtime.mjs --platform linux --arch x64", "install:sharp": "node scripts/install-sharp-for-platform.js", - "build:mac": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform darwin && pnpm run plugin:bundle -- --name feishu --platform darwin && electron-builder --mac", - "build:mac:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform darwin --arch arm64 && pnpm run plugin:bundle -- --name feishu --platform darwin --arch arm64 && electron-builder --mac --arm64", - "build:mac:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform darwin --arch x64 && pnpm run plugin:bundle -- --name feishu --platform darwin --arch x64 && electron-builder --mac --x64", - "build:win": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform win32 && pnpm run plugin:bundle -- --name feishu --platform win32 && electron-builder --win", - "build:win:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform win32 --arch x64 && pnpm run plugin:bundle -- --name feishu --platform win32 --arch x64 && electron-builder --win --x64", - "build:win:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform win32 --arch arm64 && pnpm run plugin:bundle -- --name feishu --platform win32 --arch arm64 && electron-builder --win --arm64", - "build:linux": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform linux && pnpm run plugin:bundle -- --name feishu --platform linux && electron-builder --linux", - "build:linux:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform linux --arch x64 && pnpm run plugin:bundle -- --name feishu --platform linux --arch x64 && electron-builder --linux --x64", - "build:linux:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name feishu --platform linux --arch arm64 && electron-builder --linux --arm64", + "build:mac": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform darwin && pnpm run plugin:bundle -- --name feishu --platform darwin && pnpm run installRuntime:duckdb:vss:mac && electron-builder --mac", + "build:mac:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform darwin --arch arm64 && pnpm run plugin:bundle -- --name feishu --platform darwin --arch arm64 && pnpm run installRuntime:duckdb:vss:mac:arm64 && electron-builder --mac --arm64", + "build:mac:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform darwin --arch x64 && pnpm run plugin:bundle -- --name feishu --platform darwin --arch x64 && pnpm run installRuntime:duckdb:vss:mac:x64 && electron-builder --mac --x64", + "build:win": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform win32 && pnpm run plugin:bundle -- --name feishu --platform win32 && pnpm run installRuntime:duckdb:vss:win && electron-builder --win", + "build:win:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform win32 --arch x64 && pnpm run plugin:bundle -- --name feishu --platform win32 --arch x64 && pnpm run installRuntime:duckdb:vss:win:x64 && electron-builder --win --x64", + "build:win:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform win32 --arch arm64 && pnpm run plugin:bundle -- --name feishu --platform win32 --arch arm64 && pnpm run installRuntime:duckdb:vss:win:arm64 && electron-builder --win --arm64", + "build:linux": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform linux && pnpm run plugin:bundle -- --name feishu --platform linux && pnpm run installRuntime:duckdb:vss:linux && electron-builder --linux", + "build:linux:x64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name cua --platform linux --arch x64 && pnpm run plugin:bundle -- --name feishu --platform linux --arch x64 && pnpm run installRuntime:duckdb:vss:linux:x64 && electron-builder --linux --x64", + "build:linux:arm64": "pnpm run build && pnpm run plugin:bundle:clean && pnpm run plugin:bundle -- --name feishu --platform linux --arch arm64 && pnpm run installRuntime:duckdb:vss:linux:arm64 && electron-builder --linux --arm64", "afterSign": "scripts/notarize.js", "installRuntime": "npx -y tiny-runtime-injector --type uv --dir ./runtime/uv --runtime-version 0.9.18 && npx -y tiny-runtime-injector --type node --dir ./runtime/node && npx -y tiny-runtime-injector --type rtk --dir ./runtime/rtk", "installRuntime:win:x64": "npx -y tiny-runtime-injector --type uv --dir ./runtime/uv --runtime-version 0.9.18 -a x64 -p win32 && npx -y tiny-runtime-injector --type node --dir ./runtime/node -a x64 -p win32 && npx -y tiny-runtime-injector --type rtk --dir ./runtime/rtk -a x64 -p win32", @@ -70,6 +70,15 @@ "installRuntime:linux:x64": "npx -y tiny-runtime-injector --type uv --dir ./runtime/uv --runtime-version 0.9.18 -a x64 -p linux && npx -y tiny-runtime-injector --type node --dir ./runtime/node -a x64 -p linux && npx -y tiny-runtime-injector --type rtk --dir ./runtime/rtk -a x64 -p linux", "installRuntime:linux:arm64": "npx -y tiny-runtime-injector --type uv --dir ./runtime/uv --runtime-version 0.9.18 -a arm64 -p linux && npx -y tiny-runtime-injector --type node --dir ./runtime/node -a arm64 -p linux && npx -y tiny-runtime-injector --type rtk --dir ./runtime/rtk -a arm64 -p linux", "installRuntime:duckdb:vss": "node scripts/installVss.js", + "installRuntime:duckdb:vss:mac": "node scripts/installVss.js --platform darwin", + "installRuntime:duckdb:vss:mac:arm64": "node scripts/installVss.js --platform darwin --arch arm64", + "installRuntime:duckdb:vss:mac:x64": "node scripts/installVss.js --platform darwin --arch x64", + "installRuntime:duckdb:vss:win": "node scripts/installVss.js --platform win32", + "installRuntime:duckdb:vss:win:x64": "node scripts/installVss.js --platform win32 --arch x64", + "installRuntime:duckdb:vss:win:arm64": "node scripts/installVss.js --platform win32 --arch arm64", + "installRuntime:duckdb:vss:linux": "node scripts/installVss.js --platform linux", + "installRuntime:duckdb:vss:linux:x64": "node scripts/installVss.js --platform linux --arch x64", + "installRuntime:duckdb:vss:linux:arm64": "node scripts/installVss.js --platform linux --arch arm64", "smoke:duckdb:vss": "node scripts/smoke-duckdb-vss.js", "i18n": "i18n-check -s zh-CN -f i18next --locales src/renderer/src/i18n", "i18n:en": "i18n-check -s en-US -f i18next --locales src/renderer/src/i18n", @@ -79,29 +88,26 @@ }, "dependencies": { "@agentclientprotocol/sdk": "^0.16.1", - "@ai-sdk/amazon-bedrock": "^4.0.121", - "@ai-sdk/anthropic": "^3.0.86", - "@ai-sdk/azure": "^3.0.77", - "@ai-sdk/google": "^3.0.83", - "@ai-sdk/google-vertex": "^4.0.149", - "@ai-sdk/openai": "^3.0.74", - "@ai-sdk/openai-compatible": "^2.0.51", - "@ai-sdk/provider": "^3.0.10", + "@ai-sdk/amazon-bedrock": "^5.0.2", + "@ai-sdk/anthropic": "^4.0.1", + "@ai-sdk/azure": "^4.0.2", + "@ai-sdk/google": "^4.0.2", + "@ai-sdk/google-vertex": "^5.0.2", + "@ai-sdk/openai": "^4.0.2", + "@ai-sdk/openai-compatible": "^3.0.1", + "@ai-sdk/provider": "^4.0.0", "@aws-sdk/client-bedrock": "^3.1057.0", "@aws-sdk/credential-providers": "^3.1057.0", - "@duckdb/node-api": "1.5.3-r.1", - "@e2b/code-interpreter": "^1.5.1", - "@electron-toolkit/preload": "^3.0.2", + "@duckdb/node-api": "1.5.4-r.1", "@electron-toolkit/utils": "^4.0.0", "@ff-labs/fff-node": "^0.9.3", "@jxa/run": "^1.4.0", "@larksuiteoapi/node-sdk": "^1.64.0", "@modelcontextprotocol/sdk": "^1.29.0", "@parcel/watcher": "^2.5.6", - "ai": "^6.0.209", + "ai": "^7.0.4", "axios": "^1.16.1", "better-sqlite3-multiple-ciphers": "12.9.0", - "cheerio": "^1.2.0", "compare-versions": "^6.1.1", "cross-spawn": "^7.0.6", "diff": "^8.0.4", @@ -119,10 +125,11 @@ "level": "^8.0.1", "mammoth": "^1.12.0", "nanoid": "^5.1.11", - "node-pty": "^1.1.0", + "node-pty": "1.2.0-beta.14", "ollama": "^0.6.3", "opendal": "^0.49.4", "pdf-parse-new": "^1.4.1", + "qrcode": "^1.5.4", "run-applescript": "^7.1.0", "safe-regex2": "^5.1.1", "sharp": "^0.34.5", @@ -131,7 +138,7 @@ "undici": "^7.25.0", "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz", "xml2js": "^0.6.2", - "zod": "^3.25.76" + "zod": "^4.4.3" }, "devDependencies": { "@antv/infographic": "^0.2.19", @@ -141,12 +148,12 @@ "@iconify-json/vscode-icons": "^1.2.49", "@iconify/vue": "^5.0.1", "@lingual/i18n-check": "0.8.12", + "@lucide/vue": "^1.22.0", "@pinia/colada": "^0.20.0", "@playwright/test": "^1.60.0", "@tailwindcss/typography": "^0.5.19", "@tailwindcss/vite": "^4.3.0", "@tiptap/core": "^2.27.2", - "@tiptap/extension-code-block": "^2.27.2", "@tiptap/extension-document": "^2.27.2", "@tiptap/extension-hard-break": "^2.27.2", "@tiptap/extension-history": "^2.27.2", @@ -157,38 +164,32 @@ "@tiptap/pm": "^2.27.2", "@tiptap/suggestion": "^2.27.2", "@tiptap/vue-3": "^2.27.2", - "@types/better-sqlite3": "^7.6.13", - "@types/mime-types": "^3.0.1", - "@types/node": "^24.12.4", - "@types/xlsx": "^0.0.35", - "@typescript/native-preview": "7.0.0-dev.20260518.1", + "@types/node": "^24.13.2", + "@types/qrcode": "^1.5.6", + "@typescript/native-preview": "7.0.0-dev.20260626.1", "@unovis/ts": "1.6.4", "@unovis/vue": "1.6.4", - "@vee-validate/zod": "^4.15.1", "@vitejs/plugin-vue": "^6.0.7", "@vitest/ui": "^3.2.4", "@vue/test-utils": "^2.4.10", - "@vueuse/core": "^12.8.2", - "@xterm/addon-fit": "^0.10.0", + "@vueuse/core": "^14.3.0", "@xterm/xterm": "^5.5.0", - "autoprefixer": "^10.5.0", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "cross-env": "^10.1.0", "dayjs": "^1.11.20", - "electron": "^40.10.0", + "electron": "40.10.5", "electron-builder": "26.9.0", "electron-vite": "5.0.0", "jsdom": "^26.1.0", "katex": "^0.16.47", "lint-staged": "^16.4.0", - "lucide-vue-next": "^0.544.0", "markstream-vue": "1.0.4", "mermaid": "^11.15.0", "minimatch": "^10.2.5", "monaco-editor": "^0.55.1", - "oxfmt": "^0.42.0", - "oxlint": "^1.65.0", + "oxfmt": "^0.56.0", + "oxlint": "^1.71.0", "picocolors": "^1.1.1", "pinia": "^3.0.4", "reka-ui": "^2.9.7", @@ -201,22 +202,21 @@ "taze": "^19.12.0", "tippy.js": "^6.3.7", "tw-animate-css": "^1.4.0", - "typescript": "^5.9.3", + "typescript": "^6.0.3", "vee-validate": "^4.15.1", "vite": "^7.3.3", "vite-plugin-monaco-editor-esm": "^2.0.2", "vite-plugin-vue-devtools": "^8.1.2", "vite-svg-loader": "^5.1.1", "vitest": "^3.2.4", - "vue": "^3.5.34", + "vue": "^3.5.39", "vue-i18n": "^11.4.4", "vue-router": "4", "vue-sonner": "^2.0.9", "vue-tsgo": "0.2.2", "vue-virtual-scroller": "^3.0.3", "vuedraggable": "^4.1.0", - "yaml": "^2.9.0", - "zod-to-json-schema": "^3.25.2" + "yaml": "^2.9.0" }, "optionalDependencies": { "@opendal/lib-darwin-arm64": "0.49.4", diff --git a/resources/acp-registry/registry.json b/resources/acp-registry/registry.json index 6bcd6ec0b..1103f9dad 100644 --- a/resources/acp-registry/registry.json +++ b/resources/acp-registry/registry.json @@ -61,7 +61,7 @@ { "id": "auggie", "name": "Auggie CLI", - "version": "0.30.0", + "version": "0.31.0", "description": "Augment Code's powerful software agent, backed by industry-leading context engine", "repository": "https://github.com/augmentcode/auggie", "website": "https://www.augmentcode.com/", @@ -72,7 +72,7 @@ "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/auggie.svg", "distribution": { "npx": { - "package": "@augmentcode/auggie@0.30.0", + "package": "@augmentcode/auggie@0.31.0", "args": [ "--acp" ], @@ -103,7 +103,7 @@ { "id": "claude-acp", "name": "Claude Agent", - "version": "0.51.0", + "version": "0.52.0", "description": "ACP wrapper for Anthropic's Claude", "repository": "https://github.com/agentclientprotocol/claude-agent-acp", "authors": [ @@ -114,7 +114,7 @@ "license": "proprietary", "distribution": { "npx": { - "package": "@agentclientprotocol/claude-agent-acp@0.51.0" + "package": "@agentclientprotocol/claude-agent-acp@0.52.0" } }, "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/claude-acp.svg" @@ -122,7 +122,7 @@ { "id": "cline", "name": "Cline", - "version": "3.0.29", + "version": "3.0.31", "description": "Autonomous coding agent CLI - capable of creating/editing files, running commands, using the browser, and more", "repository": "https://github.com/cline/cline", "website": "https://cline.bot/cli", @@ -133,7 +133,7 @@ "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/cline.svg", "distribution": { "npx": { - "package": "cline@3.0.29", + "package": "cline@3.0.31", "args": [ "--acp" ] @@ -163,7 +163,7 @@ { "id": "codex-acp", "name": "Codex", - "version": "1.0.0", + "version": "1.0.1", "description": "ACP adapter for OpenAI's coding assistant", "repository": "https://github.com/agentclientprotocol/codex-acp", "authors": [ @@ -174,7 +174,7 @@ "license": "Apache-2.0", "distribution": { "npx": { - "package": "@agentclientprotocol/codex-acp@1.0.0" + "package": "@agentclientprotocol/codex-acp@1.0.1" } }, "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/codex-acp.svg" @@ -331,7 +331,7 @@ { "id": "cursor", "name": "Cursor", - "version": "2026.06.24", + "version": "2026.06.26", "description": "Cursor's coding agent", "website": "https://cursor.com/docs/cli/acp", "authors": [ @@ -341,42 +341,42 @@ "distribution": { "binary": { "darwin-aarch64": { - "archive": "https://downloads.cursor.com/lab/2026.06.24-00-45-58-9f61de7/darwin/arm64/agent-cli-package.tar.gz", + "archive": "https://downloads.cursor.com/lab/2026.06.26-7079533/darwin/arm64/agent-cli-package.tar.gz", "cmd": "./dist-package/cursor-agent", "args": [ "acp" ] }, "darwin-x86_64": { - "archive": "https://downloads.cursor.com/lab/2026.06.24-00-45-58-9f61de7/darwin/x64/agent-cli-package.tar.gz", + "archive": "https://downloads.cursor.com/lab/2026.06.26-7079533/darwin/x64/agent-cli-package.tar.gz", "cmd": "./dist-package/cursor-agent", "args": [ "acp" ] }, "linux-aarch64": { - "archive": "https://downloads.cursor.com/lab/2026.06.24-00-45-58-9f61de7/linux/arm64/agent-cli-package.tar.gz", + "archive": "https://downloads.cursor.com/lab/2026.06.26-7079533/linux/arm64/agent-cli-package.tar.gz", "cmd": "./dist-package/cursor-agent", "args": [ "acp" ] }, "linux-x86_64": { - "archive": "https://downloads.cursor.com/lab/2026.06.24-00-45-58-9f61de7/linux/x64/agent-cli-package.tar.gz", + "archive": "https://downloads.cursor.com/lab/2026.06.26-7079533/linux/x64/agent-cli-package.tar.gz", "cmd": "./dist-package/cursor-agent", "args": [ "acp" ] }, "windows-aarch64": { - "archive": "https://downloads.cursor.com/lab/2026.06.24-00-45-58-9f61de7/windows/arm64/agent-cli-package.zip", + "archive": "https://downloads.cursor.com/lab/2026.06.26-7079533/windows/arm64/agent-cli-package.zip", "cmd": "./dist-package\\cursor-agent.cmd", "args": [ "acp" ] }, "windows-x86_64": { - "archive": "https://downloads.cursor.com/lab/2026.06.24-00-45-58-9f61de7/windows/x64/agent-cli-package.zip", + "archive": "https://downloads.cursor.com/lab/2026.06.26-7079533/windows/x64/agent-cli-package.zip", "cmd": "./dist-package\\cursor-agent.cmd", "args": [ "acp" @@ -467,7 +467,7 @@ { "id": "dimcode", "name": "DimCode", - "version": "0.2.10", + "version": "0.2.11", "description": "A coding agent that puts leading models at your command.", "website": "https://dimcode.dev/docs/acp.html", "authors": [ @@ -476,7 +476,7 @@ "license": "proprietary", "distribution": { "npx": { - "package": "dimcode@0.2.10", + "package": "dimcode@0.2.11", "args": [ "acp" ] @@ -487,7 +487,7 @@ { "id": "dirac", "name": "Dirac", - "version": "0.4.9", + "version": "0.4.10", "description": "Reduces API costs by more than 50%, produces better and faster work. Uses Hash anchored parallel edits, AST manipulation and a whole lot of neat optimizations. Fully Open Source.", "repository": "https://github.com/dirac-run/dirac", "website": "https://dirac.run", @@ -498,7 +498,7 @@ "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/dirac.svg", "distribution": { "npx": { - "package": "dirac-cli@0.4.9", + "package": "dirac-cli@0.4.10", "args": [ "--acp" ] @@ -508,7 +508,7 @@ { "id": "factory-droid", "name": "Factory Droid", - "version": "0.158.0", + "version": "0.159.1", "description": "Factory Droid - AI coding agent powered by Factory AI", "website": "https://factory.ai/product/cli", "authors": [ @@ -517,7 +517,7 @@ "license": "proprietary", "distribution": { "npx": { - "package": "droid@0.158.0", + "package": "droid@0.159.1", "args": [ "exec", "--output-format", @@ -534,7 +534,7 @@ { "id": "fast-agent", "name": "fast-agent", - "version": "0.7.21", + "version": "0.8.0", "description": "Code and build agents with comprehensive multi-provider support", "repository": "https://github.com/evalstate/fast-agent", "website": "https://fast-agent.ai", @@ -544,7 +544,7 @@ "license": "Apache 2.0", "distribution": { "uvx": { - "package": "fast-agent-acp==0.7.21", + "package": "fast-agent-acp==0.8.0", "args": [ "-x" ] @@ -555,7 +555,7 @@ { "id": "gemini", "name": "Gemini CLI", - "version": "0.47.0", + "version": "0.49.0", "description": "Google's official CLI for Gemini", "repository": "https://github.com/google-gemini/gemini-cli", "website": "https://geminicli.com", @@ -565,7 +565,7 @@ "license": "Apache-2.0", "distribution": { "npx": { - "package": "@google/gemini-cli@0.47.0", + "package": "@google/gemini-cli@0.49.0", "args": [ "--acp" ] @@ -614,7 +614,7 @@ { "id": "goose", "name": "goose", - "version": "1.38.0", + "version": "1.39.0", "description": "A local, extensible, open source AI agent that automates engineering tasks", "repository": "https://github.com/block/goose", "website": "https://block.github.io/goose/", @@ -625,35 +625,35 @@ "distribution": { "binary": { "darwin-aarch64": { - "archive": "https://github.com/block/goose/releases/download/v1.38.0/goose-aarch64-apple-darwin.tar.bz2", + "archive": "https://github.com/block/goose/releases/download/v1.39.0/goose-aarch64-apple-darwin.tar.bz2", "cmd": "./goose", "args": [ "acp" ] }, "darwin-x86_64": { - "archive": "https://github.com/block/goose/releases/download/v1.38.0/goose-x86_64-apple-darwin.tar.bz2", + "archive": "https://github.com/block/goose/releases/download/v1.39.0/goose-x86_64-apple-darwin.tar.bz2", "cmd": "./goose", "args": [ "acp" ] }, "linux-aarch64": { - "archive": "https://github.com/block/goose/releases/download/v1.38.0/goose-aarch64-unknown-linux-gnu.tar.bz2", + "archive": "https://github.com/block/goose/releases/download/v1.39.0/goose-aarch64-unknown-linux-gnu.tar.bz2", "cmd": "./goose", "args": [ "acp" ] }, "linux-x86_64": { - "archive": "https://github.com/block/goose/releases/download/v1.38.0/goose-x86_64-unknown-linux-gnu.tar.bz2", + "archive": "https://github.com/block/goose/releases/download/v1.39.0/goose-x86_64-unknown-linux-gnu.tar.bz2", "cmd": "./goose", "args": [ "acp" ] }, "windows-x86_64": { - "archive": "https://github.com/block/goose/releases/download/v1.38.0/goose-x86_64-pc-windows-msvc.zip", + "archive": "https://github.com/block/goose/releases/download/v1.39.0/goose-x86_64-pc-windows-msvc.zip", "cmd": "./goose-package\\goose.exe", "args": [ "acp" @@ -666,7 +666,7 @@ { "id": "grok-build", "name": "Grok Build", - "version": "0.2.65", + "version": "0.2.73", "description": "xAI's coding agent and CLI", "website": "https://x.ai/cli", "authors": [ @@ -675,7 +675,7 @@ "license": "proprietary", "distribution": { "npx": { - "package": "@xai-official/grok@0.2.65", + "package": "@xai-official/grok@0.2.73", "args": [ "agent", "stdio" @@ -687,7 +687,7 @@ { "id": "junie", "name": "Junie", - "version": "1966.57.0", + "version": "2045.46.0", "description": "AI Coding Agent by JetBrains", "repository": "https://github.com/JetBrains/junie", "website": "https://junie.jetbrains.com", @@ -698,35 +698,35 @@ "distribution": { "binary": { "darwin-aarch64": { - "archive": "https://github.com/JetBrains/junie/releases/download/1966.57/junie-release-1966.57-macos-aarch64.zip", + "archive": "https://github.com/JetBrains/junie/releases/download/2045.46/junie-release-2045.46-macos-aarch64.zip", "cmd": "./Applications/junie.app/Contents/MacOS/junie", "args": [ "--acp=true" ] }, "darwin-x86_64": { - "archive": "https://github.com/JetBrains/junie/releases/download/1966.57/junie-release-1966.57-macos-amd64.zip", + "archive": "https://github.com/JetBrains/junie/releases/download/2045.46/junie-release-2045.46-macos-amd64.zip", "cmd": "./Applications/junie.app/Contents/MacOS/junie", "args": [ "--acp=true" ] }, "linux-aarch64": { - "archive": "https://github.com/JetBrains/junie/releases/download/1966.57/junie-release-1966.57-linux-aarch64.zip", + "archive": "https://github.com/JetBrains/junie/releases/download/2045.46/junie-release-2045.46-linux-aarch64.zip", "cmd": "./junie-app/bin/junie", "args": [ "--acp=true" ] }, "linux-x86_64": { - "archive": "https://github.com/JetBrains/junie/releases/download/1966.57/junie-release-1966.57-linux-amd64.zip", + "archive": "https://github.com/JetBrains/junie/releases/download/2045.46/junie-release-2045.46-linux-amd64.zip", "cmd": "./junie-app/bin/junie", "args": [ "--acp=true" ] }, "windows-x86_64": { - "archive": "https://github.com/JetBrains/junie/releases/download/1966.57/junie-release-1966.57-windows-amd64.zip", + "archive": "https://github.com/JetBrains/junie/releases/download/2045.46/junie-release-2045.46-windows-amd64.zip", "cmd": "./junie/junie.exe", "args": [ "--acp=true" @@ -869,7 +869,7 @@ { "id": "mistral-vibe", "name": "Mistral Vibe", - "version": "2.17.1", + "version": "2.18.0", "description": "Mistral's open-source coding assistant", "repository": "https://github.com/mistralai/mistral-vibe", "website": "https://mistral.ai/products/vibe", @@ -881,23 +881,23 @@ "distribution": { "binary": { "darwin-aarch64": { - "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.17.1/vibe-acp-darwin-aarch64-2.17.1.zip", + "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.18.0/vibe-acp-darwin-aarch64-2.18.0.zip", "cmd": "./vibe-acp" }, "darwin-x86_64": { - "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.17.1/vibe-acp-darwin-x86_64-2.17.1.zip", + "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.18.0/vibe-acp-darwin-x86_64-2.18.0.zip", "cmd": "./vibe-acp" }, "linux-aarch64": { - "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.17.1/vibe-acp-linux-aarch64-2.17.1.zip", + "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.18.0/vibe-acp-linux-aarch64-2.18.0.zip", "cmd": "./vibe-acp" }, "linux-x86_64": { - "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.17.1/vibe-acp-linux-x86_64-2.17.1.zip", + "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.18.0/vibe-acp-linux-x86_64-2.18.0.zip", "cmd": "./vibe-acp" }, "windows-x86_64": { - "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.17.1/vibe-acp-windows-x86_64-2.17.1.zip", + "archive": "https://github.com/mistralai/mistral-vibe/releases/download/v2.18.0/vibe-acp-windows-x86_64-2.18.0.zip", "cmd": "./vibe-acp.exe" } } @@ -906,7 +906,7 @@ { "id": "nova", "name": "Nova", - "version": "1.1.20", + "version": "1.1.21", "description": "Nova by Compass AI - a fully-fledged software engineer at your command", "repository": "https://github.com/Compass-Agentic-Platform/nova", "website": "https://www.compassap.ai/portfolio/nova.html", @@ -917,7 +917,7 @@ "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/nova.svg", "distribution": { "npx": { - "package": "@compass-ai/nova@1.1.20", + "package": "@compass-ai/nova@1.1.21", "args": [ "acp" ] @@ -927,7 +927,7 @@ { "id": "opencode", "name": "OpenCode", - "version": "1.17.10", + "version": "1.17.11", "description": "The open source coding agent", "repository": "https://github.com/anomalyco/opencode", "website": "https://opencode.ai", @@ -939,42 +939,42 @@ "distribution": { "binary": { "darwin-aarch64": { - "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.10/opencode-darwin-arm64.zip", + "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.11/opencode-darwin-arm64.zip", "cmd": "./opencode", "args": [ "acp" ] }, "darwin-x86_64": { - "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.10/opencode-darwin-x64.zip", + "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.11/opencode-darwin-x64.zip", "cmd": "./opencode", "args": [ "acp" ] }, "linux-aarch64": { - "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.10/opencode-linux-arm64.tar.gz", + "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.11/opencode-linux-arm64.tar.gz", "cmd": "./opencode", "args": [ "acp" ] }, "linux-x86_64": { - "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.10/opencode-linux-x64.tar.gz", + "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.11/opencode-linux-x64.tar.gz", "cmd": "./opencode", "args": [ "acp" ] }, "windows-aarch64": { - "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.10/opencode-windows-arm64.zip", + "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.11/opencode-windows-arm64.zip", "cmd": "./opencode", "args": [ "acp" ] }, "windows-x86_64": { - "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.10/opencode-windows-x64.zip", + "archive": "https://github.com/anomalyco/opencode/releases/download/v1.17.11/opencode-windows-x64.zip", "cmd": "./opencode.exe", "args": [ "acp" @@ -1003,7 +1003,7 @@ { "id": "poolside", "name": "Poolside", - "version": "1.0.6", + "version": "1.0.7", "description": "Poolside's coding agent", "repository": "https://github.com/poolsideai/pool", "website": "https://poolside.ai", @@ -1014,42 +1014,42 @@ "distribution": { "binary": { "darwin-aarch64": { - "archive": "https://downloads.poolside.ai/pool/v1.0.6/pool-darwin-arm64.tar.gz", + "archive": "https://downloads.poolside.ai/pool/v1.0.7/pool-darwin-arm64.tar.gz", "cmd": "./pool-darwin-arm64", "args": [ "acp" ] }, "darwin-x86_64": { - "archive": "https://downloads.poolside.ai/pool/v1.0.6/pool-darwin-amd64.tar.gz", + "archive": "https://downloads.poolside.ai/pool/v1.0.7/pool-darwin-amd64.tar.gz", "cmd": "./pool-darwin-amd64", "args": [ "acp" ] }, "linux-aarch64": { - "archive": "https://downloads.poolside.ai/pool/v1.0.6/pool-linux-arm64.tar.gz", + "archive": "https://downloads.poolside.ai/pool/v1.0.7/pool-linux-arm64.tar.gz", "cmd": "./pool-linux-arm64", "args": [ "acp" ] }, "linux-x86_64": { - "archive": "https://downloads.poolside.ai/pool/v1.0.6/pool-linux-amd64.tar.gz", + "archive": "https://downloads.poolside.ai/pool/v1.0.7/pool-linux-amd64.tar.gz", "cmd": "./pool-linux-amd64", "args": [ "acp" ] }, "windows-aarch64": { - "archive": "https://downloads.poolside.ai/pool/v1.0.6/pool-windows-arm64.tar.gz", + "archive": "https://downloads.poolside.ai/pool/v1.0.7/pool-windows-arm64.tar.gz", "cmd": "./pool-windows-arm64.exe", "args": [ "acp" ] }, "windows-x86_64": { - "archive": "https://downloads.poolside.ai/pool/v1.0.6/pool-windows-amd64.tar.gz", + "archive": "https://downloads.poolside.ai/pool/v1.0.7/pool-windows-amd64.tar.gz", "cmd": "./pool-windows-amd64.exe", "args": [ "acp" @@ -1082,7 +1082,7 @@ { "id": "qwen-code", "name": "Qwen Code", - "version": "0.19.2", + "version": "0.19.3", "description": "Alibaba's Qwen coding assistant", "repository": "https://github.com/QwenLM/qwen-code", "website": "https://qwenlm.github.io/qwen-code-docs/en/users/overview", @@ -1092,7 +1092,7 @@ "license": "Apache-2.0", "distribution": { "npx": { - "package": "@qwen-code/qwen-code@0.19.2", + "package": "@qwen-code/qwen-code@0.19.3", "args": [ "--acp", "--experimental-skills" @@ -1104,7 +1104,7 @@ { "id": "sigit", "name": "siGit Code", - "version": "1.2.1", + "version": "1.2.2", "description": "Local-first coding agent. Runs entirely on your machine with optional on-device LLM inference via Onde.", "repository": "https://github.com/getsigit/sigit", "website": "https://github.com/getsigit/sigit", @@ -1115,32 +1115,32 @@ "distribution": { "binary": { "darwin-aarch64": { - "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.1/sigit-macos-arm64.tar.gz", + "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.2/sigit-macos-arm64.tar.gz", "cmd": "./sigit" }, "darwin-x86_64": { - "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.1/sigit-macos-amd64.tar.gz", + "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.2/sigit-macos-amd64.tar.gz", "cmd": "./sigit" }, "linux-aarch64": { - "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.1/sigit-linux-arm64", + "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.2/sigit-linux-arm64", "cmd": "./sigit-linux-arm64" }, "linux-x86_64": { - "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.1/sigit-linux-amd64", + "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.2/sigit-linux-amd64", "cmd": "./sigit-linux-amd64" }, "windows-aarch64": { - "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.1/sigit-win-arm64.exe", + "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.2/sigit-win-arm64.exe", "cmd": "./sigit-win-arm64.exe" }, "windows-x86_64": { - "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.1/sigit-win-amd64.exe", + "archive": "https://github.com/getsigit/sigit/releases/download/v1.2.2/sigit-win-amd64.exe", "cmd": "./sigit-win-amd64.exe" } }, "npx": { - "package": "@smbcloud/sigit@1.2.1" + "package": "@smbcloud/sigit@1.2.2" } }, "icon": "https://cdn.agentclientprotocol.com/registry/v1/latest/sigit.svg" diff --git a/resources/model-db/providers.json b/resources/model-db/providers.json index 3c7191dfe..3d7be7a82 100644 --- a/resources/model-db/providers.json +++ b/resources/model-db/providers.json @@ -4294,8 +4294,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": true, "open_weights": false, @@ -4392,8 +4391,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": true, "open_weights": false, @@ -10059,6 +10057,33 @@ "display_name": "Vercel AI Gateway", "doc": "https://github.com/vercel/ai/tree/5eb85cc45a259553501f535b8ac79a77d0e79223/packages/gateway", "models": [ + { + "id": "xai/grok-imagine-video-1.5", + "name": "Grok Imagine Video 1.5", + "display_name": "Grok Imagine Video 1.5", + "modalities": { + "input": [ + "text" + ], + "output": [ + "video" + ] + }, + "limit": { + "context": 8192, + "output": 8192 + }, + "temperature": true, + "tool_call": false, + "reasoning": { + "supported": false + }, + "attachment": false, + "open_weights": false, + "release_date": "2026-06-22", + "last_updated": "2026-06-22", + "type": "chat" + }, { "id": "xai/grok-4.1-fast-reasoning", "name": "Grok 4.1 Fast Reasoning", @@ -10086,7 +10111,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-07-09", + "release_date": "2025-11-19", "last_updated": "2025-07-09", "cost": { "input": 0.2, @@ -10222,7 +10247,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-07-09", + "release_date": "2025-11-19", "last_updated": "2025-07-09", "cost": { "input": 0.2, @@ -10375,7 +10400,7 @@ }, "attachment": true, "open_weights": false, - "release_date": "2026-03-09", + "release_date": "2026-03-10", "last_updated": "2026-03-23", "cost": { "input": 1.25, @@ -10471,7 +10496,7 @@ }, "attachment": true, "open_weights": false, - "release_date": "2026-03-09", + "release_date": "2026-03-10", "last_updated": "2026-03-23", "cost": { "input": 1.25, @@ -10506,7 +10531,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-03-09", + "release_date": "2026-03-10", "last_updated": "2026-03-23", "cost": { "input": 1.25, @@ -10605,7 +10630,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-09-05", + "release_date": "2025-07-11", "last_updated": "2025-09-05", "cost": { "input": 0.57, @@ -10667,8 +10692,8 @@ ] }, "limit": { - "context": 262114, - "output": 262114 + "context": 216144, + "output": 216144 }, "temperature": true, "tool_call": true, @@ -10693,9 +10718,9 @@ "release_date": "2025-11-06", "last_updated": "2025-11-06", "cost": { - "input": 0.6, - "output": 2.5, - "cache_read": 0.15 + "input": 0.47, + "output": 2, + "cache_read": 0.141 }, "type": "chat" }, @@ -10752,8 +10777,7 @@ "modalities": { "input": [ "text", - "image", - "pdf" + "image" ], "output": [ "text" @@ -10877,7 +10901,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-12-21", + "release_date": "2025-12-03", "last_updated": "2025-12-21", "type": "chat" }, @@ -11012,7 +11036,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-12-21", + "release_date": "2025-12-18", "last_updated": "2025-12-21", "type": "chat" }, @@ -11039,7 +11063,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-12-21", + "release_date": "2025-12-03", "last_updated": "2025-12-21", "type": "chat" }, @@ -11066,7 +11090,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-03-06", + "release_date": "2026-01-15", "last_updated": "2026-03-06", "type": "chat" }, @@ -11093,7 +11117,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2024-03-01", + "release_date": "2024-04-15", "last_updated": "2024-03", "type": "chat" }, @@ -11120,7 +11144,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-03-06", + "release_date": "2026-01-15", "last_updated": "2026-03-06", "type": "chat" }, @@ -11147,7 +11171,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2024-09-01", + "release_date": "2024-12-04", "last_updated": "2024-09", "type": "chat" }, @@ -11174,7 +11198,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-03-06", + "release_date": "2026-01-15", "last_updated": "2026-03-06", "type": "chat" }, @@ -11336,7 +11360,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2024-09-01", + "release_date": "2025-01-07", "last_updated": "2024-09", "type": "chat" }, @@ -11363,7 +11387,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2024-03-01", + "release_date": "2024-06-03", "last_updated": "2024-03", "type": "chat" }, @@ -11391,7 +11415,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-04", - "release_date": "2024-07-01", + "release_date": "2024-07-18", "last_updated": "2024-07-01", "cost": { "input": 0.15, @@ -11477,7 +11501,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-05-07", + "release_date": "2025-05-21", "last_updated": "2025-05-07", "cost": { "input": 0.1, @@ -11524,7 +11548,8 @@ "display_name": "Mistral Medium Latest", "modalities": { "input": [ - "text" + "text", + "image" ], "output": [ "text" @@ -11542,7 +11567,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-05-21", + "release_date": "2026-04-29", "last_updated": "2026-05-21", "cost": { "input": 1.5, @@ -11589,7 +11614,8 @@ "display_name": "Devstral Small 2", "modalities": { "input": [ - "text" + "text", + "image" ], "output": [ "text" @@ -11607,7 +11633,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-05-07", + "release_date": "2025-12-09", "last_updated": "2025-05-07", "cost": { "input": 0.1, @@ -11641,7 +11667,7 @@ "attachment": true, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-12-01", + "release_date": "2025-12-02", "last_updated": "2025-12-01", "cost": { "input": 0.2, @@ -11706,7 +11732,7 @@ "attachment": true, "open_weights": true, "knowledge": "2025-06", - "release_date": "2024-09-01", + "release_date": "2024-09-17", "last_updated": "2026-03-16", "cost": { "input": 0.1, @@ -12155,7 +12181,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2025-10-15", "last_updated": "2026-06-08", "type": "chat" }, @@ -12270,7 +12296,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2025-05-20", "last_updated": "2026-06-08", "type": "chat" }, @@ -12297,7 +12323,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2025-07-31", "last_updated": "2026-06-08", "type": "chat" }, @@ -12381,7 +12407,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-01", - "release_date": "2025-03-20", + "release_date": "2025-08-26", "last_updated": "2025-08-26", "cost": { "input": 0.3, @@ -12720,7 +12746,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2025-10-15", "last_updated": "2026-06-08", "type": "chat" }, @@ -12833,14 +12859,14 @@ "context": 480, "output": 8192 }, - "temperature": false, + "temperature": true, "tool_call": false, "reasoning": { "supported": false }, "attachment": false, "open_weights": false, - "release_date": "2025-05-22", + "release_date": "2025-05-20", "last_updated": "2025-05-22", "type": "imageGeneration" }, @@ -12999,7 +13025,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2024-08-02", "last_updated": "2026-06-08", "type": "chat" }, @@ -13028,7 +13054,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2024-12-01", + "release_date": "2025-10-29", "last_updated": "2024-12-01", "cost": { "input": 0.075, @@ -13061,7 +13087,7 @@ "attachment": false, "open_weights": false, "knowledge": "2021-09", - "release_date": "2023-09-28", + "release_date": "2023-09-18", "last_updated": "2023-03-01", "cost": { "input": 1.5, @@ -13090,8 +13116,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": true, "open_weights": false, @@ -13280,7 +13305,7 @@ "attachment": false, "open_weights": false, "knowledge": "2021-09", - "release_date": "2023-05-28", + "release_date": "2023-03-01", "last_updated": "2023-11-06", "cost": { "input": 0.5, @@ -13299,8 +13324,7 @@ "pdf" ], "output": [ - "text", - "image" + "text" ] }, "limit": { @@ -13331,7 +13355,7 @@ "attachment": true, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-08-07", + "release_date": "2025-10-06", "last_updated": "2025-10-06", "cost": { "input": 15, @@ -13380,7 +13404,7 @@ "attachment": true, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-04-16", + "release_date": "2025-06-10", "last_updated": "2025-06-10", "cost": { "input": 20, @@ -13498,8 +13522,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": true, "open_weights": false, @@ -13848,7 +13871,7 @@ "attachment": true, "open_weights": false, "knowledge": "2025-08-31", - "release_date": "2026-02-24", + "release_date": "2026-02-05", "last_updated": "2026-02-05", "cost": { "input": 1.75, @@ -14021,7 +14044,7 @@ }, "limit": { "context": 131072, - "output": 131000 + "output": 131072 }, "temperature": true, "tool_call": true, @@ -14040,9 +14063,8 @@ "release_date": "2025-08-05", "last_updated": "2025-08-05", "cost": { - "input": 0.35, - "output": 0.75, - "cache_read": 0.25 + "input": 0.1, + "output": 0.5 }, "type": "chat" }, @@ -14318,7 +14340,7 @@ "attachment": true, "open_weights": false, "knowledge": "2024-10", - "release_date": "2024-06-26", + "release_date": "2025-06-26", "last_updated": "2024-06-26", "cost": { "input": 10, @@ -15402,8 +15424,8 @@ ] }, "limit": { - "context": 131000, - "output": 40000 + "context": 200000, + "output": 120000 }, "temperature": true, "tool_call": true, @@ -15428,9 +15450,9 @@ "release_date": "2025-12-22", "last_updated": "2025-12-22", "cost": { - "input": 2.25, - "output": 2.75, - "cache_read": 2.25 + "input": 0.6, + "output": 2.2, + "cache_read": 0.12 }, "type": "chat" }, @@ -15540,7 +15562,7 @@ "attachment": false, "open_weights": true, "knowledge": "2025-01", - "release_date": "2025-01-01", + "release_date": "2026-01-19", "last_updated": "2026-01-19", "cost": { "input": 0.06, @@ -15555,17 +15577,15 @@ "display_name": "GLM 5.1", "modalities": { "input": [ - "text", - "image", - "pdf" + "text" ], "output": [ "text" ] }, "limit": { - "context": 202800, - "output": 64000 + "context": 202000, + "output": 202000 }, "temperature": true, "tool_call": true, @@ -15583,8 +15603,8 @@ "release_date": "2026-04-07", "last_updated": "2026-04-07", "cost": { - "input": 1.4, - "output": 4.4, + "input": 1.3, + "output": 4.3, "cache_read": 0.26 }, "type": "chat" @@ -15703,6 +15723,39 @@ }, "type": "chat" }, + { + "id": "zai/glm-5.2-fast", + "name": "GLM 5.2 Fast", + "display_name": "GLM 5.2 Fast", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1000000, + "output": 128000 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": false, + "open_weights": false, + "release_date": "2026-06-16", + "last_updated": "2026-06-16", + "cost": { + "input": 3, + "output": 10.25, + "cache_read": 0.5 + }, + "type": "chat" + }, { "id": "zai/glm-5v-turbo", "name": "GLM 5V Turbo", @@ -15833,7 +15886,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-04", - "release_date": "2026-03-13", + "release_date": "2026-01-19", "last_updated": "2026-01-19", "cost": { "input": 0.07, @@ -15879,8 +15932,8 @@ "release_date": "2026-02-12", "last_updated": "2026-02-12", "cost": { - "input": 1, - "output": 3.2, + "input": 0.95, + "output": 3.15, "cache_read": 0.2 }, "type": "chat" @@ -15946,7 +15999,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-01-28", + "release_date": "2026-02-13", "last_updated": "2026-01-28", "type": "chat" }, @@ -16011,7 +16064,8 @@ "display_name": "Seed 1.6", "modalities": { "input": [ - "text" + "text", + "image" ], "output": [ "text" @@ -16117,7 +16171,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-10-31", + "release_date": "2025-10-24", "last_updated": "2025-10-31", "type": "chat" }, @@ -16144,7 +16198,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-08-28", + "release_date": "2025-09-09", "last_updated": "2025-08-28", "type": "chat" }, @@ -16171,7 +16225,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-11-28", + "release_date": "2025-12-03", "last_updated": "2025-11-28", "type": "chat" }, @@ -16364,7 +16418,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2024-12-01", + "release_date": "2025-12-15", "last_updated": "2025-12-15", "cost": { "input": 0.05, @@ -16429,7 +16483,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-03-18", + "release_date": "2026-03-11", "last_updated": "2026-03-11", "cost": { "input": 0.15, @@ -16463,7 +16517,7 @@ "attachment": true, "open_weights": false, "knowledge": "2024-10", - "release_date": "2024-12-01", + "release_date": "2025-10-28", "last_updated": "2025-10-28", "cost": { "input": 0.2, @@ -16532,7 +16586,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-12-17", + "release_date": "2025-12-16", "last_updated": "2026-02-04", "cost": { "input": 0.1, @@ -16582,7 +16636,6 @@ "modalities": { "input": [ "text", - "image", "pdf" ], "output": [ @@ -16611,6 +16664,33 @@ }, "type": "chat" }, + { + "id": "quiverai/arrow-1.1", + "name": "Arrow 1.1", + "display_name": "Arrow 1.1", + "modalities": { + "input": [ + "text" + ], + "output": [ + "image" + ] + }, + "limit": { + "context": 131072, + "output": 131072 + }, + "temperature": true, + "tool_call": false, + "reasoning": { + "supported": false + }, + "attachment": false, + "open_weights": false, + "release_date": "2026-04-16", + "last_updated": "2026-04-16", + "type": "chat" + }, { "id": "inception/mercury-coder-small", "name": "Mercury Coder Small Beta", @@ -16675,6 +16755,42 @@ }, "type": "chat" }, + { + "id": "anthropic/claude-3.5-haiku", + "name": "Claude 3.5 Haiku", + "display_name": "Claude 3.5 Haiku", + "modalities": { + "input": [ + "text", + "image", + "pdf" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 200000, + "output": 8192 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": false + }, + "attachment": true, + "open_weights": false, + "knowledge": "2024-07-31", + "release_date": "2024-11-04", + "last_updated": "2024-10-22", + "cost": { + "input": 0.8, + "output": 4, + "cache_read": 0.08, + "cache_write": 1 + }, + "type": "chat" + }, { "id": "anthropic/claude-haiku-4.5", "name": "Claude Haiku 4.5", @@ -16912,7 +17028,7 @@ "attachment": true, "open_weights": false, "knowledge": "2025-03-31", - "release_date": "2024-11-24", + "release_date": "2025-11-24", "last_updated": "2025-11-24", "cost": { "input": 5, @@ -17338,42 +17454,6 @@ }, "type": "chat" }, - { - "id": "anthropic/claude-3.5-haiku", - "name": "Claude Haiku 3.5", - "display_name": "Claude Haiku 3.5", - "modalities": { - "input": [ - "text", - "image", - "pdf" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 200000, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "knowledge": "2024-07-31", - "release_date": "2024-10-22", - "last_updated": "2024-10-22", - "cost": { - "input": 0.8, - "output": 4, - "cache_read": 0.08, - "cache_write": 1 - }, - "type": "chat" - }, { "id": "cohere/rerank-v3.5", "name": "Cohere Rerank 3.5", @@ -17589,7 +17669,9 @@ "display_name": "Interfaze Beta", "modalities": { "input": [ - "text" + "text", + "image", + "pdf" ], "output": [ "text" @@ -17638,7 +17720,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-06-01", + "release_date": "2025-05-29", "last_updated": "2025-06", "type": "chat" }, @@ -17665,7 +17747,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2025-11-25", "last_updated": "2026-06-08", "type": "imageGeneration" }, @@ -17719,7 +17801,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2025-12-16", "last_updated": "2026-06-08", "type": "chat" }, @@ -17746,7 +17828,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2024-10-01", + "release_date": "2024-10-02", "last_updated": "2024-10", "type": "chat" }, @@ -17800,7 +17882,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2026-01-15", "last_updated": "2026-06-08", "type": "chat" }, @@ -17827,7 +17909,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2026-01-15", "last_updated": "2026-06-08", "type": "chat" }, @@ -17854,7 +17936,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-06-01", + "release_date": "2025-05-29", "last_updated": "2025-06", "type": "chat" }, @@ -17881,7 +17963,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-06-08", + "release_date": "2025-11-25", "last_updated": "2026-06-08", "type": "imageGeneration" }, @@ -18016,7 +18098,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2024-03-01", + "release_date": "2024-03-13", "last_updated": "2024-03", "type": "chat" }, @@ -18043,7 +18125,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2024-10-01", + "release_date": "2024-10-30", "last_updated": "2024-10", "type": "chat" }, @@ -18125,7 +18207,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-01-01", + "release_date": "2026-01-27", "last_updated": "2025-01", "cost": { "input": 0.25, @@ -18307,7 +18389,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2024-04-01", + "release_date": "2024-04-30", "last_updated": "2024-04", "type": "chat" }, @@ -18318,7 +18400,8 @@ "modalities": { "input": [ "text", - "image" + "image", + "pdf" ], "output": [ "text" @@ -18337,7 +18420,7 @@ "attachment": true, "open_weights": false, "knowledge": "2024-10", - "release_date": "2024-12-01", + "release_date": "2025-12-02", "last_updated": "2024-12-01", "cost": { "input": 0.3, @@ -18487,7 +18570,7 @@ "attachment": true, "open_weights": true, "knowledge": "2025-09", - "release_date": "2025-09-24", + "release_date": "2025-09-23", "last_updated": "2025-09-24", "cost": { "input": 0.4, @@ -18617,7 +18700,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-04", - "release_date": "2025-09-23", + "release_date": "2025-09-05", "last_updated": "2025-09-23", "cost": { "input": 1.2, @@ -18677,7 +18760,7 @@ "attachment": false, "open_weights": true, "knowledge": "2025-04", - "release_date": "2025-09-12", + "release_date": "2025-09-11", "last_updated": "2025-09", "cost": { "input": 0.15, @@ -18717,7 +18800,7 @@ "attachment": true, "open_weights": false, "knowledge": "2025-04", - "release_date": "2026-06-01", + "release_date": "2026-06-02", "last_updated": "2026-06-02", "cost": { "input": 0.4, @@ -18734,8 +18817,7 @@ "modalities": { "input": [ "text", - "image", - "pdf" + "image" ], "output": [ "text" @@ -18753,7 +18835,7 @@ "attachment": true, "open_weights": true, "knowledge": "2025-04", - "release_date": "2025-09-24", + "release_date": "2025-09-23", "last_updated": "2025-09-24", "cost": { "input": 0.4, @@ -18767,8 +18849,7 @@ "display_name": "Qwen 3.7 Max", "modalities": { "input": [ - "text", - "pdf" + "text" ], "output": [ "text" @@ -18908,7 +18989,7 @@ "attachment": false, "open_weights": true, "knowledge": "2025-09", - "release_date": "2025-09-12", + "release_date": "2025-09-11", "last_updated": "2025-09", "cost": { "input": 0.15, @@ -19015,7 +19096,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-04", - "release_date": "2025-04-01", + "release_date": "2025-07-22", "last_updated": "2025-04", "cost": { "input": 1.5, @@ -19049,7 +19130,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-04", - "release_date": "2025-04-01", + "release_date": "2025-04-28", "last_updated": "2025-04", "cost": { "input": 0.22, @@ -19168,7 +19249,7 @@ "attachment": false, "open_weights": true, "knowledge": "2025-01", - "release_date": "2025-01", + "release_date": "2026-01-23", "last_updated": "2025-01", "cost": { "input": 1.2, @@ -19360,7 +19441,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-04", - "release_date": "2025-04-01", + "release_date": "2025-04-28", "last_updated": "2025-04", "cost": { "input": 0.12, @@ -19406,7 +19487,7 @@ "attachment": true, "open_weights": false, "knowledge": "2025-04", - "release_date": "2025-09-24", + "release_date": "2025-09-23", "last_updated": "2025-04", "cost": { "input": 0.4, @@ -19421,8 +19502,7 @@ "modalities": { "input": [ "text", - "image", - "pdf" + "image" ], "output": [ "text" @@ -19439,7 +19519,7 @@ }, "attachment": true, "open_weights": false, - "release_date": "2025-09-24", + "release_date": "2025-09-23", "last_updated": "2026-05-01", "cost": { "input": 0.4, @@ -19472,7 +19552,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-04", - "release_date": "2025-04-01", + "release_date": "2025-04-28", "last_updated": "2025-04", "cost": { "input": 0.12, @@ -19505,7 +19585,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-04", - "release_date": "2025-04-01", + "release_date": "2025-04-28", "last_updated": "2025-04", "cost": { "input": 0.16, @@ -19519,8 +19599,7 @@ "display_name": "Qwen 3.6 Max Preview", "modalities": { "input": [ - "text", - "pdf" + "text" ], "output": [ "text" @@ -19573,7 +19652,7 @@ "attachment": false, "open_weights": false, "knowledge": "2025-04", - "release_date": "2025-04-01", + "release_date": "2025-07-31", "last_updated": "2025-04", "cost": { "input": 0.15, @@ -19653,7 +19732,7 @@ }, "attachment": false, "open_weights": false, - "release_date": "2026-03-13", + "release_date": "2026-01-15", "last_updated": "2026-03-13", "type": "chat" }, @@ -19681,7 +19760,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-08-30", + "release_date": "2025-09-01", "last_updated": "2025-08-30", "type": "chat" }, @@ -19709,7 +19788,7 @@ "attachment": false, "open_weights": false, "knowledge": "2023-12", - "release_date": "2024-09-18", + "release_date": "2024-09-25", "last_updated": "2024-09-18", "cost": { "input": 0.1, @@ -19871,7 +19950,7 @@ "attachment": false, "open_weights": false, "knowledge": "2023-12", - "release_date": "2024-09-18", + "release_date": "2024-09-25", "last_updated": "2024-09-18", "cost": { "input": 0.15, @@ -19983,9 +20062,7 @@ "display_name": "DeepSeek V4 Flash", "modalities": { "input": [ - "text", - "image", - "pdf" + "text" ], "output": [ "text" @@ -20097,8 +20174,7 @@ "display_name": "DeepSeek V4 Pro", "modalities": { "input": [ - "text", - "pdf" + "text" ], "output": [ "text" @@ -20143,9 +20219,7 @@ "display_name": "DeepSeek V3.2 Thinking", "modalities": { "input": [ - "text", - "image", - "pdf" + "text" ], "output": [ "text" @@ -20191,8 +20265,8 @@ ] }, "limit": { - "context": 163840, - "output": 8192 + "context": 128000, + "output": 128000 }, "temperature": true, "tool_call": true, @@ -20206,9 +20280,8 @@ "release_date": "2025-08-21", "last_updated": "2025-08-21", "cost": { - "input": 0.56, - "output": 1.68, - "cache_read": 0.28 + "input": 0.6, + "output": 1.7 }, "type": "chat" }, @@ -20218,9 +20291,7 @@ "display_name": "DeepSeek V3.2", "modalities": { "input": [ - "text", - "image", - "pdf" + "text" ], "output": [ "text" @@ -20233,8 +20304,7 @@ "temperature": true, "tool_call": false, "reasoning": { - "supported": true, - "default": true + "supported": true }, "extra_capabilities": { "reasoning": { @@ -20412,7 +20482,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-10-27", + "release_date": "2025-12-23", "last_updated": "2025-12-23", "cost": { "input": 0.3, @@ -20612,7 +20682,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-10-27", + "release_date": "2025-12-23", "last_updated": "2025-10-27", "cost": { "input": 0.3, @@ -20647,7 +20717,7 @@ "attachment": false, "open_weights": false, "knowledge": "2024-10", - "release_date": "2025-10-24", + "release_date": "2025-11-09", "last_updated": "2025-10-24", "cost": { "input": 0.3, @@ -22101,6 +22171,43 @@ }, "type": "chat" }, + { + "id": "openai/gpt-oss-120b", + "name": "GPT OSS 120B", + "display_name": "GPT OSS 120B", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 32768 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2025-08-05", + "last_updated": "2025-08-05", + "cost": { + "input": 0.25, + "output": 0.69 + }, + "type": "chat" + }, { "id": "XiaomiMiMo/MiMo-V2.5-Pro", "name": "MiMo-V2.5-Pro", @@ -23162,7 +23269,7 @@ ] }, "limit": { - "context": 512000, + "context": 1000000, "output": 128000 }, "temperature": true, @@ -23179,7 +23286,7 @@ "attachment": true, "open_weights": true, "release_date": "2026-06-01", - "last_updated": "2026-06-01", + "last_updated": "2026-06-25", "cost": { "input": 0, "output": 0, @@ -29131,6 +29238,50 @@ }, "type": "chat" }, + { + "id": "accounts/fireworks/routers/glm-5p2-fast", + "name": "GLM 5.2 Fast", + "display_name": "GLM 5.2 Fast", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1048575, + "output": 131072 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-26", + "last_updated": "2026-06-26", + "cost": { + "input": 2.1, + "output": 6.6, + "cache_read": 0.21 + }, + "type": "chat" + }, { "id": "accounts/fireworks/routers/kimi-k2p7-code-fast", "name": "Kimi K2.7 Code Fast", @@ -29579,7 +29730,7 @@ ] }, "limit": { - "context": 1048576, + "context": 1048575, "output": 131072 }, "temperature": true, @@ -36070,7 +36221,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true + "supported": true, + "default": true }, "extra_capabilities": { "reasoning": { @@ -37972,6 +38124,49 @@ }, "type": "chat" }, + { + "id": "zai-org/GLM-5.2", + "name": "GLM-5.2", + "display_name": "GLM-5.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 432000, + "output": 432000 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-13", + "last_updated": "2026-06-13", + "cost": { + "input": 1.4, + "output": 4.4 + }, + "type": "chat" + }, { "id": "deepseek-ai/DeepSeek-V3.2-fast", "name": "DeepSeek-V3.2-fast", @@ -39828,6 +40023,249 @@ } ] }, + "tinfoil": { + "id": "tinfoil", + "name": "Tinfoil", + "display_name": "Tinfoil", + "api": "https://inference.tinfoil.sh/v1", + "doc": "https://docs.tinfoil.sh", + "models": [ + { + "id": "kimi-k2-6", + "name": "Kimi K2.6", + "display_name": "Kimi K2.6", + "modalities": { + "input": [ + "text", + "image", + "video" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 256000, + "output": 262144 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": true, + "open_weights": true, + "knowledge": "2025-01", + "release_date": "2026-04-21", + "last_updated": "2026-04-21", + "cost": { + "input": 1.5, + "output": 5.25 + }, + "type": "chat" + }, + { + "id": "llama3-3-70b", + "name": "Llama-3.3-70B-Instruct", + "display_name": "Llama-3.3-70B-Instruct", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 128000, + "output": 4096 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": false + }, + "attachment": true, + "open_weights": true, + "knowledge": "2023-12", + "release_date": "2024-12-06", + "last_updated": "2024-12-06", + "cost": { + "input": 1.75, + "output": 2.75 + }, + "type": "chat" + }, + { + "id": "gpt-oss-safeguard-120b", + "name": "gpt-oss-safeguard-120b", + "display_name": "gpt-oss-safeguard-120b", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131000, + "output": 32768 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": false, + "open_weights": true, + "knowledge": "2024-06", + "release_date": "2025-10-29", + "last_updated": "2025-10-29", + "cost": { + "input": 0.15, + "output": 0.6 + }, + "type": "chat" + }, + { + "id": "nomic-embed-text", + "name": "Nomic Embed Text v1.5", + "display_name": "Nomic Embed Text v1.5", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 8192, + "output": 768 + }, + "temperature": false, + "tool_call": false, + "reasoning": { + "supported": false + }, + "attachment": false, + "open_weights": true, + "release_date": "2024-02", + "last_updated": "2024-02", + "cost": { + "input": 0.05, + "output": 0 + }, + "type": "chat" + }, + { + "id": "gpt-oss-120b", + "name": "gpt-oss-120b", + "display_name": "gpt-oss-120b", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131000, + "output": 32768 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "attachment": false, + "open_weights": true, + "knowledge": "2024-06", + "release_date": "2025-08-05", + "last_updated": "2025-08-05", + "cost": { + "input": 0.15, + "output": 0.6 + }, + "type": "chat" + }, + { + "id": "glm-5-2", + "name": "GLM-5.2", + "display_name": "GLM-5.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 384000, + "output": 131072 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-13", + "last_updated": "2026-06-13", + "cost": { + "input": 1.5, + "output": 5.25 + }, + "type": "chat" + }, + { + "id": "gemma4-31b", + "name": "Gemma 4 31B IT", + "display_name": "Gemma 4 31B IT", + "modalities": { + "input": [ + "text", + "image" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 256000, + "output": 32768 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": true, + "open_weights": true, + "release_date": "2026-04-02", + "last_updated": "2026-04-02", + "cost": { + "input": 0.4, + "output": 1 + }, + "type": "chat" + } + ] + }, "mistral": { "id": "mistral", "name": "Mistral", @@ -43850,7 +44288,7 @@ ] }, "limit": { - "context": 512000, + "context": 1000000, "output": 131072 }, "temperature": true, @@ -47544,8 +47982,8 @@ "release_date": "2026-04-24", "last_updated": "2026-06-11", "cost": { - "input": 0.17, - "output": 0.35, + "input": 0.138, + "output": 0.275, "cache_read": 0.028 }, "type": "chat" @@ -47625,9 +48063,9 @@ "release_date": "2026-04-20", "last_updated": "2026-06-11", "cost": { - "input": 0.85, - "output": 4.655, - "cache_read": 0.22 + "input": 0.75, + "output": 3.5, + "cache_read": 0.16 }, "type": "chat" }, @@ -48120,9 +48558,9 @@ "release_date": "2024-04-01", "last_updated": "2026-06-11", "cost": { - "input": 0.85, - "output": 2.75, - "cache_read": 0.3 + "input": 0.43, + "output": 1.75, + "cache_read": 0.08 }, "type": "chat" }, @@ -48371,8 +48809,8 @@ "release_date": "2026-04-24", "last_updated": "2026-06-11", "cost": { - "input": 1.73, - "output": 3.796, + "input": 1.65, + "output": 3.301, "cache_read": 0.33 }, "type": "chat" @@ -49082,7 +49520,7 @@ ] }, "limit": { - "context": 256000, + "context": 128000, "output": 16384 }, "tool_call": true, @@ -49294,6 +49732,7 @@ "context": 128000, "output": 16384 }, + "temperature": true, "tool_call": true, "reasoning": { "supported": true, @@ -49703,77 +50142,15 @@ }, "type": "chat" }, - { - "id": "claude-opus-4-6-fast", - "name": "Claude Opus 4.6 Fast", - "display_name": "Claude Opus 4.6 Fast", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 1000000, - "output": 128000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": false - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "default_enabled": false, - "mode": "mixed", - "budget": { - "min": 1024, - "unit": "tokens" - }, - "effort": "high", - "effort_options": [ - "low", - "medium", - "high", - "max" - ], - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ], - "notes": [ - "Anthropic recommends adaptive thinking with effort for Claude 4.6; budget_tokens remains a deprecated compatibility path.", - "Anthropic API defaults effort to high; lower effort levels should be chosen per workload." - ] - } - }, - "attachment": true, - "open_weights": false, - "knowledge": "2025-05-31", - "release_date": "2026-04-08", - "last_updated": "2026-06-11", - "cost": { - "input": 36, - "output": 180, - "cache_read": 3.6, - "cache_write": 45 - }, - "type": "chat" - }, { "id": "qwen-3-7-max", "name": "Qwen 3.7 Max", "display_name": "Qwen 3.7 Max", "modalities": { "input": [ - "text" + "text", + "image", + "video" ], "output": [ "text" @@ -49789,7 +50166,7 @@ "supported": true, "default": true }, - "attachment": false, + "attachment": true, "open_weights": false, "release_date": "2026-05-22", "last_updated": "2026-06-11", @@ -53229,8 +53606,8 @@ "release_date": "2025-08-05", "last_updated": "2025-08-05", "cost": { - "input": 0.44, - "output": 0.99 + "input": 0.22, + "output": 0.83 }, "type": "chat" }, @@ -53344,6 +53721,43 @@ "output": 2.75 }, "type": "chat" + }, + { + "id": "zai-org/GLM-5.2", + "name": "GLM-5.2", + "display_name": "GLM-5.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 524288, + "output": 32768 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-13", + "last_updated": "2026-06-13", + "cost": { + "input": 1.54, + "output": 4.84 + }, + "type": "chat" } ] }, @@ -56056,14 +56470,60 @@ "last_updated": "2026-01-27", "cost": { "input": 0.52, - "output": 2.59 + "output": 2.59, + "cache_read": 0.13 }, "type": "chat" }, { - "id": "glm-5-fast", - "name": "GLM 5 Fast", - "display_name": "GLM 5 Fast", + "id": "kimi-k2.6-flex", + "name": "Kimi K2.6 Flex", + "display_name": "Kimi K2.6 Flex", + "modalities": { + "input": [ + "text", + "image" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 262128, + "output": 262128 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": true, + "open_weights": true, + "release_date": "2026-04-21", + "last_updated": "2026-04-21", + "cost": { + "input": 0.345, + "output": 1.61, + "cache_read": 0.08625 + }, + "type": "chat" + }, + { + "id": "glm-5.2-short-fast-flex", + "name": "GLM 5.2 Short Fast Flex", + "display_name": "GLM 5.2 Short Fast Flex", "modalities": { "input": [ "text" @@ -56073,8 +56533,8 @@ ] }, "limit": { - "context": 202736, - "output": 202736 + "context": 199984, + "output": 199984 }, "temperature": true, "tool_call": true, @@ -56083,11 +56543,45 @@ }, "attachment": false, "open_weights": true, - "release_date": "2026-04-07", - "last_updated": "2026-04-07", + "release_date": "2026-06-17", + "last_updated": "2026-06-17", "cost": { - "input": 1.1, - "output": 3.6 + "input": 0.725, + "output": 2.25, + "cache_read": 0.18125 + }, + "type": "chat" + }, + { + "id": "glm-5.2-flex", + "name": "GLM 5.2 Flex", + "display_name": "GLM 5.2 Flex", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1048560, + "output": 1048560 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-17", + "last_updated": "2026-06-17", + "cost": { + "input": 0.725, + "output": 2.25, + "cache_read": 0.18125 }, "type": "chat" }, @@ -56124,14 +56618,15 @@ "last_updated": "2026-06-17", "cost": { "input": 1.45, - "output": 4.5 + "output": 4.5, + "cache_read": 0.3625 }, "type": "chat" }, { - "id": "qwen3.5-397b-fast", - "name": "Qwen3.5 397B Fast", - "display_name": "Qwen3.5 397B Fast", + "id": "glm-5.2-short-fast", + "name": "GLM 5.2 Short Fast", + "display_name": "GLM 5.2 Short Fast", "modalities": { "input": [ "text" @@ -56141,39 +56636,29 @@ ] }, "limit": { - "context": 262128, - "output": 262128 + "context": 199984, + "output": 199984 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, "attachment": false, "open_weights": true, - "release_date": "2026-02-01", - "last_updated": "2026-02-01", + "release_date": "2026-06-17", + "last_updated": "2026-06-17", "cost": { - "input": 0.69, - "output": 4.14 + "input": 1.45, + "output": 4.5, + "cache_read": 0.3625 }, "type": "chat" }, { - "id": "glm-5.1-fast", - "name": "GLM 5.1 Fast", - "display_name": "GLM 5.1 Fast", + "id": "qwen3.5-397b-fast", + "name": "Qwen3.5 397B Fast", + "display_name": "Qwen3.5 397B Fast", "modalities": { "input": [ "text" @@ -56183,21 +56668,33 @@ ] }, "limit": { - "context": 202736, - "output": 202736 + "context": 262128, + "output": 262128 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "attachment": false, "open_weights": true, - "release_date": "2026-04-07", - "last_updated": "2026-04-07", + "release_date": "2026-02-01", + "last_updated": "2026-02-01", "cost": { - "input": 1.1, - "output": 3.6 + "input": 0.69, + "output": 4.14, + "cache_read": 0.1725 }, "type": "chat" }, @@ -56229,7 +56726,8 @@ "last_updated": "2026-04-21", "cost": { "input": 0.69, - "output": 3.22 + "output": 3.22, + "cache_read": 0.1725 }, "type": "chat" }, @@ -56272,7 +56770,141 @@ "last_updated": "2026-04-01", "cost": { "input": 0.29, - "output": 1.15 + "output": 1.15, + "cache_read": 0.0725 + }, + "type": "chat" + }, + { + "id": "glm-5.2-short-flex", + "name": "GLM 5.2 Short Flex", + "display_name": "GLM 5.2 Short Flex", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 199984, + "output": 199984 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-17", + "last_updated": "2026-06-17", + "cost": { + "input": 0.725, + "output": 2.25, + "cache_read": 0.18125 + }, + "type": "chat" + }, + { + "id": "glm-5.2-fast", + "name": "GLM 5.2 Fast", + "display_name": "GLM 5.2 Fast", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1048560, + "output": 1048560 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": false + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-17", + "last_updated": "2026-06-17", + "cost": { + "input": 1.45, + "output": 4.5, + "cache_read": 0.3625 + }, + "type": "chat" + }, + { + "id": "glm-5.2-short", + "name": "GLM 5.2 Short", + "display_name": "GLM 5.2 Short", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 199984, + "output": 199984 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-17", + "last_updated": "2026-06-17", + "cost": { + "input": 1.45, + "output": 4.5, + "cache_read": 0.3625 + }, + "type": "chat" + }, + { + "id": "kimi-k2.7-code-flex", + "name": "Kimi K2.7 Code Flex", + "display_name": "Kimi K2.7 Code Flex", + "modalities": { + "input": [ + "text", + "image" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 262144, + "output": 262144 + }, + "temperature": false, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": true, + "open_weights": true, + "knowledge": "2025-01", + "release_date": "2026-06-12", + "last_updated": "2026-06-12", + "cost": { + "input": 0.475, + "output": 2, + "cache_read": 0.11875 }, "type": "chat" }, @@ -56316,7 +56948,8 @@ "last_updated": "2026-04-21", "cost": { "input": 0.69, - "output": 3.22 + "output": 3.22, + "cache_read": 0.1725 }, "type": "chat" }, @@ -56360,7 +56993,8 @@ "last_updated": "2026-01-27", "cost": { "input": 0.52, - "output": 2.59 + "output": 2.59, + "cache_read": 0.13 }, "type": "chat" }, @@ -56399,7 +57033,8 @@ "last_updated": "2026-06-12", "cost": { "input": 0.95, - "output": 4 + "output": 4, + "cache_read": 0.2375 }, "type": "chat" }, @@ -56443,7 +57078,8 @@ "last_updated": "2026-04-01", "cost": { "input": 0.29, - "output": 1.15 + "output": 1.15, + "cache_read": 0.0725 }, "type": "chat" }, @@ -56486,50 +57122,8 @@ "last_updated": "2026-02-01", "cost": { "input": 0.69, - "output": 4.14 - }, - "type": "chat" - }, - { - "id": "zai-org/GLM-5.1-FP8", - "name": "GLM 5.1 FP8", - "display_name": "GLM 5.1 FP8", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 202736, - "output": 202736 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "release_date": "2026-04-07", - "last_updated": "2026-04-07", - "cost": { - "input": 1.1, - "output": 3.6 + "output": 4.14, + "cache_read": 0.1725 }, "type": "chat" } @@ -56542,80 +57136,6 @@ "api": "https://api.siliconflow.cn/v1", "doc": "https://cloud.siliconflow.com/models", "models": [ - { - "id": "moonshotai/Kimi-K2-Instruct-0905", - "name": "moonshotai/Kimi-K2-Instruct-0905", - "display_name": "moonshotai/Kimi-K2-Instruct-0905", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-09-08", - "last_updated": "2025-11-25", - "cost": { - "input": 0.4, - "output": 2 - }, - "type": "chat" - }, - { - "id": "moonshotai/Kimi-K2-Thinking", - "name": "moonshotai/Kimi-K2-Thinking", - "display_name": "moonshotai/Kimi-K2-Thinking", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262000, - "output": 262000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-11-07", - "last_updated": "2025-11-25", - "cost": { - "input": 0.55, - "output": 2.5 - }, - "type": "chat" - }, { "id": "baidu/ERNIE-4.5-300B-A47B", "name": "baidu/ERNIE-4.5-300B-A47B", @@ -56746,80 +57266,6 @@ }, "type": "chat" }, - { - "id": "Pro/moonshotai/Kimi-K2-Thinking", - "name": "Pro/moonshotai/Kimi-K2-Thinking", - "display_name": "Pro/moonshotai/Kimi-K2-Thinking", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262000, - "output": 262000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-11-07", - "last_updated": "2025-11-25", - "cost": { - "input": 0.55, - "output": 2.5 - }, - "type": "chat" - }, - { - "id": "Pro/moonshotai/Kimi-K2-Instruct-0905", - "name": "Pro/moonshotai/Kimi-K2-Instruct-0905", - "display_name": "Pro/moonshotai/Kimi-K2-Instruct-0905", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-09-08", - "last_updated": "2025-11-25", - "cost": { - "input": 0.4, - "output": 2 - }, - "type": "chat" - }, { "id": "Pro/moonshotai/Kimi-K2.6", "name": "Pro/moonshotai/Kimi-K2.6", @@ -56952,49 +57398,6 @@ }, "type": "chat" }, - { - "id": "Pro/zai-org/GLM-4.7", - "name": "Pro/zai-org/GLM-4.7", - "display_name": "Pro/zai-org/GLM-4.7", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 205000, - "output": 205000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-12-22", - "last_updated": "2025-12-22", - "cost": { - "input": 0.6, - "output": 2.2 - }, - "type": "chat" - }, { "id": "Pro/zai-org/GLM-5.1", "name": "Pro/zai-org/GLM-5.1", @@ -57182,42 +57585,6 @@ }, "type": "chat" }, - { - "id": "Pro/MiniMaxAI/MiniMax-M2.1", - "name": "Pro/MiniMaxAI/MiniMax-M2.1", - "display_name": "Pro/MiniMaxAI/MiniMax-M2.1", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 197000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-12-23", - "last_updated": "2025-12-23", - "cost": { - "input": 0.3, - "output": 1.2 - }, - "type": "chat" - }, { "id": "Pro/MiniMaxAI/MiniMax-M2.5", "name": "Pro/MiniMaxAI/MiniMax-M2.5", @@ -57281,8 +57648,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": true }, "extra_capabilities": { "reasoning": { @@ -57352,6 +57718,49 @@ }, "type": "chat" }, + { + "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "name": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "display_name": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": false, + "release_date": "2025-07-28", + "last_updated": "2025-11-25", + "cost": { + "input": 0.13, + "output": 0.6 + }, + "type": "chat" + }, { "id": "Qwen/Qwen3.5-122B-A10B", "name": "Qwen/Qwen3.5-122B-A10B", @@ -57445,28 +57854,25 @@ "type": "chat" }, { - "id": "Qwen/Qwen3.5-4B", - "name": "Qwen/Qwen3.5-4B", - "display_name": "Qwen/Qwen3.5-4B", + "id": "Qwen/Qwen3-8B", + "name": "Qwen/Qwen3-8B", + "display_name": "Qwen/Qwen3-8B", "modalities": { "input": [ - "text", - "image", - "video" + "text" ], "output": [ "text" ] }, "limit": { - "context": 262144, - "output": 65536 + "context": 131072, + "output": 8192 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": true }, "extra_capabilities": { "reasoning": { @@ -57480,20 +57886,19 @@ } }, "attachment": false, - "open_weights": true, - "knowledge": "2025-04", - "release_date": "2026-03-03", - "last_updated": "2026-03-03", + "open_weights": false, + "release_date": "2025-04-30", + "last_updated": "2025-11-25", "cost": { - "input": 0, - "output": 0 + "input": 0.06, + "output": 0.06 }, "type": "chat" }, { - "id": "Qwen/Qwen3.5-9B", - "name": "Qwen/Qwen3.5-9B", - "display_name": "Qwen/Qwen3.5-9B", + "id": "Qwen/Qwen3.5-4B", + "name": "Qwen/Qwen3.5-4B", + "display_name": "Qwen/Qwen3.5-4B", "modalities": { "input": [ "text", @@ -57531,15 +57936,15 @@ "release_date": "2026-03-03", "last_updated": "2026-03-03", "cost": { - "input": 0.22, - "output": 1.74 + "input": 0, + "output": 0 }, "type": "chat" }, { - "id": "Qwen/Qwen3.5-35B-A3B", - "name": "Qwen/Qwen3.5-35B-A3B", - "display_name": "Qwen/Qwen3.5-35B-A3B", + "id": "Qwen/Qwen3.5-9B", + "name": "Qwen/Qwen3.5-9B", + "display_name": "Qwen/Qwen3.5-9B", "modalities": { "input": [ "text", @@ -57574,36 +57979,34 @@ "attachment": false, "open_weights": true, "knowledge": "2025-04", - "release_date": "2026-02-25", - "last_updated": "2026-02-25", + "release_date": "2026-03-03", + "last_updated": "2026-03-03", "cost": { - "input": 0.23, - "output": 1.86 + "input": 0.22, + "output": 1.74 }, "type": "chat" }, { - "id": "Qwen/Qwen3-VL-32B-Thinking", - "name": "Qwen/Qwen3-VL-32B-Thinking", - "display_name": "Qwen/Qwen3-VL-32B-Thinking", + "id": "Qwen/Qwen3-32B", + "name": "Qwen/Qwen3-32B", + "display_name": "Qwen/Qwen3-32B", "modalities": { "input": [ - "text", - "image" + "text" ], "output": [ "text" ] }, "limit": { - "context": 262000, - "output": 262000 + "context": 131072, + "output": 8192 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": true }, "extra_capabilities": { "reasoning": { @@ -57616,20 +58019,20 @@ ] } }, - "attachment": true, + "attachment": false, "open_weights": false, - "release_date": "2025-10-21", + "release_date": "2025-04-30", "last_updated": "2025-11-25", "cost": { - "input": 0.2, - "output": 1.5 + "input": 0.14, + "output": 0.57 }, "type": "chat" }, { - "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", - "name": "Qwen/Qwen3-VL-30B-A3B-Instruct", - "display_name": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "id": "Qwen/Qwen3-14B", + "name": "Qwen/Qwen3-14B", + "display_name": "Qwen/Qwen3-14B", "modalities": { "input": [ "text" @@ -57645,84 +58048,123 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true }, - "attachment": true, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, "open_weights": false, - "release_date": "2025-10-05", + "release_date": "2025-04-30", "last_updated": "2025-11-25", "cost": { - "input": 0.29, - "output": 1 + "input": 0.07, + "output": 0.28 }, "type": "chat" }, { - "id": "Qwen/Qwen3-Omni-30B-A3B-Instruct", - "name": "Qwen/Qwen3-Omni-30B-A3B-Instruct", - "display_name": "Qwen/Qwen3-Omni-30B-A3B-Instruct", + "id": "Qwen/Qwen3.5-35B-A3B", + "name": "Qwen/Qwen3.5-35B-A3B", + "display_name": "Qwen/Qwen3.5-35B-A3B", "modalities": { "input": [ - "text" + "text", + "image", + "video" ], "output": [ "text" ] }, "limit": { - "context": 131072, - "output": 8192 + "context": 262144, + "output": 65536 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true }, - "attachment": true, - "open_weights": false, - "release_date": "2025-10-04", - "last_updated": "2025-11-25", + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": true, + "knowledge": "2025-04", + "release_date": "2026-02-25", + "last_updated": "2026-02-25", "cost": { - "input": 0.1, - "output": 0.4 + "input": 0.23, + "output": 1.86 }, "type": "chat" }, { - "id": "Qwen/Qwen2.5-72B-Instruct", - "name": "Qwen/Qwen2.5-72B-Instruct", - "display_name": "Qwen/Qwen2.5-72B-Instruct", + "id": "Qwen/Qwen3-VL-32B-Thinking", + "name": "Qwen/Qwen3-VL-32B-Thinking", + "display_name": "Qwen/Qwen3-VL-32B-Thinking", "modalities": { "input": [ - "text" + "text", + "image" ], "output": [ "text" ] }, "limit": { - "context": 131072, - "output": 8192 + "context": 262000, + "output": 262000 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true }, - "attachment": false, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": true, "open_weights": false, - "release_date": "2024-09-18", + "release_date": "2025-10-21", "last_updated": "2025-11-25", "cost": { - "input": 0.59, - "output": 0.59 + "input": 0.2, + "output": 1.5 }, "type": "chat" }, { - "id": "Qwen/Qwen3-14B", - "name": "Qwen/Qwen3-14B", - "display_name": "Qwen/Qwen3-14B", + "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "name": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "display_name": "Qwen/Qwen3-VL-30B-A3B-Instruct", "modalities": { "input": [ "text" @@ -57738,33 +58180,22 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, - "attachment": false, + "attachment": true, "open_weights": false, - "release_date": "2025-04-30", + "release_date": "2025-10-05", "last_updated": "2025-11-25", "cost": { - "input": 0.07, - "output": 0.28 + "input": 0.29, + "output": 1 }, "type": "chat" }, { - "id": "Qwen/Qwen2.5-14B-Instruct", - "name": "Qwen/Qwen2.5-14B-Instruct", - "display_name": "Qwen/Qwen2.5-14B-Instruct", + "id": "Qwen/Qwen2.5-72B-Instruct", + "name": "Qwen/Qwen2.5-72B-Instruct", + "display_name": "Qwen/Qwen2.5-72B-Instruct", "modalities": { "input": [ "text" @@ -57787,8 +58218,8 @@ "release_date": "2024-09-18", "last_updated": "2025-11-25", "cost": { - "input": 0.1, - "output": 0.1 + "input": 0.59, + "output": 0.59 }, "type": "chat" }, @@ -57824,194 +58255,6 @@ }, "type": "chat" }, - { - "id": "Qwen/Qwen3-32B", - "name": "Qwen/Qwen3-32B", - "display_name": "Qwen/Qwen3-32B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-04-30", - "last_updated": "2025-11-25", - "cost": { - "input": 0.14, - "output": 0.57 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "name": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "display_name": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-07-23", - "last_updated": "2025-11-25", - "cost": { - "input": 0.09, - "output": 0.6 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-8B", - "name": "Qwen/Qwen3-8B", - "display_name": "Qwen/Qwen3-8B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-04-30", - "last_updated": "2025-11-25", - "cost": { - "input": 0.06, - "output": 0.06 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-Coder-32B-Instruct", - "name": "Qwen/Qwen2.5-Coder-32B-Instruct", - "display_name": "Qwen/Qwen2.5-Coder-32B-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2024-11-11", - "last_updated": "2025-11-25", - "cost": { - "input": 0.18, - "output": 0.18 - } - }, - { - "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", - "name": "Qwen/Qwen3-Next-80B-A3B-Thinking", - "display_name": "Qwen/Qwen3-Next-80B-A3B-Thinking", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-09-25", - "last_updated": "2025-11-25", - "cost": { - "input": 0.14, - "output": 0.57 - }, - "type": "chat" - }, { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "name": "Qwen/Qwen3-VL-235B-A22B-Thinking", @@ -58055,79 +58298,6 @@ }, "type": "chat" }, - { - "id": "Qwen/Qwen3-Omni-30B-A3B-Captioner", - "name": "Qwen/Qwen3-Omni-30B-A3B-Captioner", - "display_name": "Qwen/Qwen3-Omni-30B-A3B-Captioner", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-10-04", - "last_updated": "2025-11-25", - "cost": { - "input": 0.1, - "output": 0.4 - }, - "type": "chat" - }, - { - "id": "Qwen/QwQ-32B", - "name": "Qwen/QwQ-32B", - "display_name": "Qwen/QwQ-32B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-03-06", - "last_updated": "2025-11-25", - "cost": { - "input": 0.15, - "output": 0.58 - }, - "type": "chat" - }, { "id": "Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen/Qwen3-VL-8B-Instruct", @@ -58160,50 +58330,6 @@ }, "type": "chat" }, - { - "id": "Qwen/Qwen3-VL-8B-Thinking", - "name": "Qwen/Qwen3-VL-8B-Thinking", - "display_name": "Qwen/Qwen3-VL-8B-Thinking", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262000, - "output": 262000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-10-15", - "last_updated": "2025-11-25", - "cost": { - "input": 0.18, - "output": 2 - }, - "type": "chat" - }, { "id": "Qwen/Qwen3-VL-30B-A3B-Thinking", "name": "Qwen/Qwen3-VL-30B-A3B-Thinking", @@ -58338,142 +58464,6 @@ "output": 1 } }, - { - "id": "Qwen/Qwen2.5-72B-Instruct-128K", - "name": "Qwen/Qwen2.5-72B-Instruct-128K", - "display_name": "Qwen/Qwen2.5-72B-Instruct-128K", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2024-09-18", - "last_updated": "2025-11-25", - "cost": { - "input": 0.59, - "output": 0.59 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", - "name": "Qwen/Qwen3-235B-A22B-Thinking-2507", - "display_name": "Qwen/Qwen3-235B-A22B-Thinking-2507", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-07-28", - "last_updated": "2025-11-25", - "cost": { - "input": 0.13, - "output": 0.6 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", - "name": "Qwen/Qwen3-Next-80B-A3B-Instruct", - "display_name": "Qwen/Qwen3-Next-80B-A3B-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-09-18", - "last_updated": "2025-11-25", - "cost": { - "input": 0.14, - "output": 1.4 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-32B-Instruct", - "name": "Qwen/Qwen2.5-32B-Instruct", - "display_name": "Qwen/Qwen2.5-32B-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2024-09-19", - "last_updated": "2025-11-25", - "cost": { - "input": 0.18, - "output": 0.18 - }, - "type": "chat" - }, { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "name": "Qwen/Qwen3-VL-235B-A22B-Instruct", @@ -58536,154 +58526,6 @@ }, "type": "chat" }, - { - "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", - "name": "Qwen/Qwen3-30B-A3B-Thinking-2507", - "display_name": "Qwen/Qwen3-30B-A3B-Thinking-2507", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-07-31", - "last_updated": "2025-11-25", - "cost": { - "input": 0.09, - "output": 0.3 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-VL-32B-Instruct", - "name": "Qwen/Qwen2.5-VL-32B-Instruct", - "display_name": "Qwen/Qwen2.5-VL-32B-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-03-24", - "last_updated": "2025-11-25", - "cost": { - "input": 0.27, - "output": 0.27 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-VL-72B-Instruct", - "name": "Qwen/Qwen2.5-VL-72B-Instruct", - "display_name": "Qwen/Qwen2.5-VL-72B-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-01-28", - "last_updated": "2025-11-25", - "cost": { - "input": 0.59, - "output": 0.59 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-Omni-30B-A3B-Thinking", - "name": "Qwen/Qwen3-Omni-30B-A3B-Thinking", - "display_name": "Qwen/Qwen3-Omni-30B-A3B-Thinking", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-10-04", - "last_updated": "2025-11-25", - "cost": { - "input": 0.1, - "output": 0.4 - }, - "type": "chat" - }, { "id": "PaddlePaddle/PaddleOCR-VL-1.5", "name": "PaddlePaddle/PaddleOCR-VL-1.5", @@ -58760,8 +58602,8 @@ ] }, "limit": { - "context": 205000, - "output": 205000 + "context": 1049000, + "output": 262000 }, "temperature": true, "tool_call": true, @@ -58823,9 +58665,9 @@ "type": "chat" }, { - "id": "zai-org/GLM-4.5V", - "name": "zai-org/GLM-4.5V", - "display_name": "zai-org/GLM-4.5V", + "id": "deepseek-ai/DeepSeek-R1", + "name": "deepseek-ai/DeepSeek-R1", + "display_name": "deepseek-ai/DeepSeek-R1", "modalities": { "input": [ "text" @@ -58840,56 +58682,35 @@ }, "temperature": true, "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-08-13", - "last_updated": "2025-11-25", - "cost": { - "input": 0.14, - "output": 0.86 - }, - "type": "chat" - }, - { - "id": "zai-org/GLM-4.6V", - "name": "zai-org/GLM-4.6V", - "display_name": "zai-org/GLM-4.6V", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, "reasoning": { "supported": true, "default": true }, - "attachment": true, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, "open_weights": false, - "release_date": "2025-12-07", - "last_updated": "2025-12-07", + "release_date": "2025-05-28", + "last_updated": "2025-11-25", "cost": { - "input": 0.3, - "output": 0.9 + "input": 0.5, + "output": 2.18 }, "type": "chat" }, { - "id": "zai-org/GLM-4.6", - "name": "zai-org/GLM-4.6", - "display_name": "zai-org/GLM-4.6", + "id": "deepseek-ai/DeepSeek-V3.1-Terminus", + "name": "deepseek-ai/DeepSeek-V3.1-Terminus", + "display_name": "deepseek-ai/DeepSeek-V3.1-Terminus", "modalities": { "input": [ "text" @@ -58905,20 +58726,16 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } + "supported": true, + "default": true }, "attachment": false, "open_weights": false, - "release_date": "2025-10-04", + "release_date": "2025-09-29", "last_updated": "2025-11-25", "cost": { - "input": 0.5, - "output": 1.9 + "input": 0.27, + "output": 1 }, "type": "chat" }, @@ -59011,6 +58828,43 @@ }, "type": "chat" }, + { + "id": "deepseek-ai/DeepSeek-V3.2", + "name": "deepseek-ai/DeepSeek-V3.2", + "display_name": "deepseek-ai/DeepSeek-V3.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 164000, + "output": 164000 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "attachment": false, + "open_weights": false, + "release_date": "2025-12-03", + "last_updated": "2025-12-03", + "cost": { + "input": 0.27, + "output": 0.42 + }, + "type": "chat" + }, { "id": "deepseek-ai/DeepSeek-OCR", "name": "deepseek-ai/DeepSeek-OCR", @@ -59042,9 +58896,9 @@ } }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "id": "deepseek-ai/DeepSeek-V3", + "name": "deepseek-ai/DeepSeek-V3", + "display_name": "deepseek-ai/DeepSeek-V3", "modalities": { "input": [ "text" @@ -59060,23 +58914,22 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": false, "open_weights": false, - "release_date": "2025-01-20", + "release_date": "2024-12-26", "last_updated": "2025-11-25", "cost": { - "input": 0.1, - "output": 0.1 + "input": 0.25, + "output": 1 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-V3", - "name": "deepseek-ai/DeepSeek-V3", - "display_name": "deepseek-ai/DeepSeek-V3", + "id": "deepseek-ai/DeepSeek-V3.2-Exp", + "name": "deepseek-ai/DeepSeek-V3.2-Exp", + "display_name": "deepseek-ai/DeepSeek-V3.2-Exp", "modalities": { "input": [ "text" @@ -59089,25 +58942,38 @@ "context": 131072, "output": 8192 }, - "temperature": true, "tool_call": true, "reasoning": { "supported": false }, - "attachment": false, - "open_weights": false, - "release_date": "2024-12-26", - "last_updated": "2025-11-25", - "cost": { - "input": 0.25, - "output": 1 + "type": "chat" + }, + { + "id": "Pro/deepseek-ai/DeepSeek-V3.2-Exp", + "name": "Pro/deepseek-ai/DeepSeek-V3.2-Exp", + "display_name": "Pro/deepseek-ai/DeepSeek-V3.2-Exp", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-V3.2", - "name": "deepseek-ai/DeepSeek-V3.2", - "display_name": "deepseek-ai/DeepSeek-V3.2", + "id": "inclusionAI/Ring-1T", + "name": "inclusionAI/Ring-1T", + "display_name": "inclusionAI/Ring-1T", "modalities": { "input": [ "text" @@ -59117,10 +58983,9 @@ ] }, "limit": { - "context": 164000, - "output": 164000 + "context": 131072, + "output": 8192 }, - "temperature": true, "tool_call": true, "reasoning": { "supported": true, @@ -59131,20 +58996,12 @@ "supported": true } }, - "attachment": false, - "open_weights": false, - "release_date": "2025-12-03", - "last_updated": "2025-12-03", - "cost": { - "input": 0.27, - "output": 0.42 - }, "type": "chat" }, { - "id": "deepseek-ai/deepseek-vl2", - "name": "deepseek-ai/deepseek-vl2", - "display_name": "deepseek-ai/deepseek-vl2", + "id": "inclusionAI/Ling-1T", + "name": "inclusionAI/Ling-1T", + "display_name": "inclusionAI/Ling-1T", "modalities": { "input": [ "text" @@ -59157,25 +59014,16 @@ "context": 131072, "output": 8192 }, - "temperature": true, "tool_call": true, "reasoning": { "supported": false }, - "attachment": true, - "open_weights": false, - "release_date": "2024-12-13", - "last_updated": "2025-11-25", - "cost": { - "input": 0.15, - "output": 0.15 - }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-V3.1-Terminus", - "name": "deepseek-ai/DeepSeek-V3.1-Terminus", - "display_name": "deepseek-ai/DeepSeek-V3.1-Terminus", + "id": "zai-org/GLM-4.6", + "name": "zai-org/GLM-4.6", + "display_name": "zai-org/GLM-4.6", "modalities": { "input": [ "text" @@ -59188,26 +59036,21 @@ "context": 131072, "output": 8192 }, - "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": true }, - "attachment": false, - "open_weights": false, - "release_date": "2025-09-29", - "last_updated": "2025-11-25", - "cost": { - "input": 0.27, - "output": 1 + "extra_capabilities": { + "reasoning": { + "supported": true + } }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "id": "Qwen/Qwen3-Omni-30B-A3B-Instruct", + "name": "Qwen/Qwen3-Omni-30B-A3B-Instruct", + "display_name": "Qwen/Qwen3-Omni-30B-A3B-Instruct", "modalities": { "input": [ "text" @@ -59220,26 +59063,16 @@ "context": 131072, "output": 8192 }, - "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-01-20", - "last_updated": "2025-11-25", - "cost": { - "input": 0.18, - "output": 0.18 + "supported": false }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1", - "name": "deepseek-ai/DeepSeek-R1", - "display_name": "deepseek-ai/DeepSeek-R1", + "id": "Qwen/Qwen3-Omni-30B-A3B-Thinking", + "name": "Qwen/Qwen3-Omni-30B-A3B-Thinking", + "display_name": "Qwen/Qwen3-Omni-30B-A3B-Thinking", "modalities": { "input": [ "text" @@ -59252,7 +59085,6 @@ "context": 131072, "output": 8192 }, - "temperature": true, "tool_call": true, "reasoning": { "supported": true, @@ -59269,20 +59101,12 @@ ] } }, - "attachment": false, - "open_weights": false, - "release_date": "2025-05-28", - "last_updated": "2025-11-25", - "cost": { - "input": 0.5, - "output": 2.18 - }, "type": "chat" }, { - "id": "Kwaipilot/KAT-Dev", - "name": "Kwaipilot/KAT-Dev", - "display_name": "Kwaipilot/KAT-Dev", + "id": "Qwen/Qwen3-Omni-30B-A3B-Captioner", + "name": "Qwen/Qwen3-Omni-30B-A3B-Captioner", + "display_name": "Qwen/Qwen3-Omni-30B-A3B-Captioner", "modalities": { "input": [ "text" @@ -59292,28 +59116,19 @@ ] }, "limit": { - "context": 128000, - "output": 128000 + "context": 131072, + "output": 8192 }, - "temperature": true, "tool_call": true, "reasoning": { "supported": false }, - "attachment": false, - "open_weights": false, - "release_date": "2025-09-27", - "last_updated": "2026-01-16", - "cost": { - "input": 0.2, - "output": 0.6 - }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-V3.2-Exp", - "name": "deepseek-ai/DeepSeek-V3.2-Exp", - "display_name": "deepseek-ai/DeepSeek-V3.2-Exp", + "id": "moonshotai/Kimi-K2-Instruct-0905", + "name": "moonshotai/Kimi-K2-Instruct-0905", + "display_name": "moonshotai/Kimi-K2-Instruct-0905", "modalities": { "input": [ "text" @@ -59333,9 +59148,9 @@ "type": "chat" }, { - "id": "Pro/deepseek-ai/DeepSeek-V3.2-Exp", - "name": "Pro/deepseek-ai/DeepSeek-V3.2-Exp", - "display_name": "Pro/deepseek-ai/DeepSeek-V3.2-Exp", + "id": "Pro/moonshotai/Kimi-K2-Instruct-0905", + "name": "Pro/moonshotai/Kimi-K2-Instruct-0905", + "display_name": "Pro/moonshotai/Kimi-K2-Instruct-0905", "modalities": { "input": [ "text" @@ -59355,9 +59170,9 @@ "type": "chat" }, { - "id": "inclusionAI/Ring-1T", - "name": "inclusionAI/Ring-1T", - "display_name": "inclusionAI/Ring-1T", + "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "name": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "display_name": "Qwen/Qwen3-Next-80B-A3B-Instruct", "modalities": { "input": [ "text" @@ -59372,20 +59187,14 @@ }, "tool_call": true, "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } + "supported": false }, "type": "chat" }, { - "id": "inclusionAI/Ling-1T", - "name": "inclusionAI/Ling-1T", - "display_name": "inclusionAI/Ling-1T", + "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "name": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "display_name": "Qwen/Qwen3-Next-80B-A3B-Thinking", "modalities": { "input": [ "text" @@ -59400,7 +59209,19 @@ }, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "type": "chat" }, @@ -59564,6 +59385,28 @@ "supported": false } }, + { + "id": "zai-org/GLM-4.5V", + "name": "zai-org/GLM-4.5V", + "display_name": "zai-org/GLM-4.5V", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + }, + "type": "chat" + }, { "id": "zai-org/GLM-4.5", "name": "zai-org/GLM-4.5", @@ -59625,6 +59468,62 @@ "supported": false } }, + { + "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "name": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "display_name": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "type": "chat" + }, + { + "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "name": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "display_name": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + }, + "type": "chat" + }, { "id": "THUDM/GLM-4.1V-9B-Thinking", "name": "THUDM/GLM-4.1V-9B-Thinking", @@ -60036,6 +59935,28 @@ }, "type": "chat" }, + { + "id": "Qwen/Qwen2.5-VL-32B-Instruct", + "name": "Qwen/Qwen2.5-VL-32B-Instruct", + "display_name": "Qwen/Qwen2.5-VL-32B-Instruct", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + }, + "type": "chat" + }, { "id": "Qwen/Qwen3-235B-A22B", "name": "Qwen/Qwen3-235B-A22B", @@ -60070,9 +59991,9 @@ "type": "chat" }, { - "id": "Pro/Qwen/Qwen2.5-VL-7B-Instruct", - "name": "Pro/Qwen/Qwen2.5-VL-7B-Instruct", - "display_name": "Pro/Qwen/Qwen2.5-VL-7B-Instruct", + "id": "Qwen/QwQ-32B", + "name": "Qwen/QwQ-32B", + "display_name": "Qwen/QwQ-32B", "modalities": { "input": [ "text" @@ -60087,14 +60008,25 @@ }, "tool_call": true, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "id": "Qwen/Qwen2.5-VL-72B-Instruct", + "name": "Qwen/Qwen2.5-VL-72B-Instruct", + "display_name": "Qwen/Qwen2.5-VL-72B-Instruct", "modalities": { "input": [ "text" @@ -60109,8 +60041,98 @@ }, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false + }, + "type": "chat" + }, + { + "id": "Pro/Qwen/Qwen2.5-VL-7B-Instruct", + "name": "Pro/Qwen/Qwen2.5-VL-7B-Instruct", + "display_name": "Pro/Qwen/Qwen2.5-VL-7B-Instruct", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + }, + "type": "chat" + }, + { + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "type": "chat" + }, + { + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "type": "chat" + }, + { + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true }, "type": "chat" }, @@ -60314,6 +60336,27 @@ "supported": false } }, + { + "id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "name": "Qwen/Qwen2.5-Coder-32B-Instruct", + "display_name": "Qwen/Qwen2.5-Coder-32B-Instruct", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + } + }, { "id": "Kwai-Kolors/Kolors", "name": "Kwai-Kolors/Kolors", @@ -60353,6 +60396,94 @@ }, "type": "chat" }, + { + "id": "Qwen/Qwen2.5-72B-Instruct-128K", + "name": "Qwen/Qwen2.5-72B-Instruct-128K", + "display_name": "Qwen/Qwen2.5-72B-Instruct-128K", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + }, + "type": "chat" + }, + { + "id": "deepseek-ai/deepseek-vl2", + "name": "deepseek-ai/deepseek-vl2", + "display_name": "deepseek-ai/deepseek-vl2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + }, + "type": "chat" + }, + { + "id": "Qwen/Qwen2.5-32B-Instruct", + "name": "Qwen/Qwen2.5-32B-Instruct", + "display_name": "Qwen/Qwen2.5-32B-Instruct", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + }, + "type": "chat" + }, + { + "id": "Qwen/Qwen2.5-14B-Instruct", + "name": "Qwen/Qwen2.5-14B-Instruct", + "display_name": "Qwen/Qwen2.5-14B-Instruct", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 131072, + "output": 8192 + }, + "tool_call": true, + "reasoning": { + "supported": false + }, + "type": "chat" + }, { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "name": "Qwen/Qwen2.5-Coder-7B-Instruct", @@ -69622,7 +69753,12 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } }, "attachment": true, "open_weights": false, @@ -77632,9 +77768,9 @@ "release_date": "2025-12-22", "last_updated": "2025-12-22", "cost": { - "input": 0.6, - "output": 2.2, - "cache_read": 0.11, + "input": 0.38, + "output": 1.98, + "cache_read": 0.19, "cache_write": 0 }, "type": "chat" @@ -77709,8 +77845,8 @@ "release_date": "2025-07-08", "last_updated": "2025-07-08", "cost": { - "input": 0.2, - "output": 0.6 + "input": 0.09, + "output": 0.58 }, "type": "chat" }, @@ -77772,8 +77908,8 @@ "release_date": "2025-04", "last_updated": "2025-04", "cost": { - "input": 0.1, - "output": 0.3 + "input": 0.07, + "output": 0.27 }, "type": "chat" }, @@ -77837,8 +77973,8 @@ "release_date": "2025-07-11", "last_updated": "2025-07-11", "cost": { - "input": 1, - "output": 3, + "input": 0.574, + "output": 2.294, "cache_read": 0.5 }, "type": "chat" @@ -78135,8 +78271,8 @@ "release_date": "2025-09", "last_updated": "2025-09", "cost": { - "input": 0.5, - "output": 2 + "input": 0.15, + "output": 1.5 }, "type": "chat" }, @@ -78468,51 +78604,6 @@ }, "type": "chat" }, - { - "id": "kimi-k2-thinking-turbo", - "name": "Kimi K2 Thinking Turbo", - "display_name": "Kimi K2 Thinking Turbo", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262144, - "output": 262144 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "knowledge": "2024-08", - "release_date": "2025-11-06", - "last_updated": "2025-11-06", - "cost": { - "input": 1.15, - "output": 8, - "cache_read": 0.15 - }, - "type": "chat" - }, { "id": "qwen3.6-35b-a3b", "name": "Qwen3.6 35B-A3B", @@ -78658,9 +78749,9 @@ "release_date": "2026-05-21", "last_updated": "2026-05-21", "cost": { - "input": 2.5, - "output": 7.5, - "cache_read": 0.5, + "input": 1.25, + "output": 3.75, + "cache_read": 0.125, "cache_write": 3.125 }, "type": "chat" @@ -78993,8 +79084,8 @@ "release_date": "2025-09-23", "last_updated": "2025-09-23", "cost": { - "input": 3, - "output": 15, + "input": 0.845, + "output": 3.38, "cache_read": 0.6, "cache_write": 3.75 }, @@ -79358,8 +79449,8 @@ "release_date": "2025-04-05", "last_updated": "2025-04-05", "cost": { - "input": 0.17, - "output": 0.66 + "input": 0.18, + "output": 0.59 }, "type": "chat" }, @@ -79635,9 +79726,9 @@ "release_date": "2026-04-07", "last_updated": "2026-04-07", "cost": { - "input": 1.4, - "output": 4.4, - "cache_read": 0.26, + "input": 0.931, + "output": 2.93, + "cache_read": 0.173, "cache_write": 0 }, "type": "chat" @@ -79726,8 +79817,8 @@ "release_date": "2025-09", "last_updated": "2025-09", "cost": { - "input": 0.5, - "output": 6 + "input": 0.15, + "output": 1.2 }, "type": "chat" }, @@ -79764,8 +79855,8 @@ "release_date": "2025-09-30", "last_updated": "2025-09-30", "cost": { - "input": 0.6, - "output": 2.2, + "input": 0.431, + "output": 2.007, "cache_read": 0.11, "cache_write": 0 }, @@ -79888,8 +79979,8 @@ "release_date": "2025-11-06", "last_updated": "2025-11-06", "cost": { - "input": 0.6, - "output": 2.5, + "input": 0.574, + "output": 2.294, "cache_read": 0.15 }, "type": "chat" @@ -80261,9 +80352,9 @@ "release_date": "2026-06-13", "last_updated": "2026-06-13", "cost": { - "input": 1.4, - "output": 4.4, - "cache_read": 0.26, + "input": 1.26, + "output": 3.96, + "cache_read": 0.234, "cache_write": 0 }, "type": "chat" @@ -81400,9 +81491,9 @@ "release_date": "2026-01", "last_updated": "2026-01", "cost": { - "input": 0.6, - "output": 3, - "cache_read": 0.1 + "input": 0.405, + "output": 1.98, + "cache_read": 0.225 }, "type": "chat" }, @@ -81573,8 +81664,8 @@ "release_date": "2025-08-05", "last_updated": "2025-08-05", "cost": { - "input": 0.15, - "output": 0.75 + "input": 0.05, + "output": 0.25 }, "type": "chat" }, @@ -82345,9 +82436,9 @@ "release_date": "2026-04-21", "last_updated": "2026-04-21", "cost": { - "input": 0.95, - "output": 4, - "cache_read": 0.16 + "input": 0.4, + "output": 2.2, + "cache_read": 0.08 }, "type": "chat" }, @@ -82582,8 +82673,8 @@ "release_date": "2025-04-05", "last_updated": "2025-04-05", "cost": { - "input": 0.24, - "output": 0.97 + "input": 0.27, + "output": 0.85 }, "type": "chat" }, @@ -82614,8 +82705,8 @@ "release_date": "2025-04", "last_updated": "2025-04", "cost": { - "input": 0.4, - "output": 1.8 + "input": 0.3, + "output": 1.3 }, "type": "chat" }, @@ -83424,9 +83515,9 @@ "release_date": "2025-07-28", "last_updated": "2025-07-28", "cost": { - "input": 0.2, - "output": 1.1, - "cache_read": 0.03, + "input": 0.13, + "output": 0.85, + "cache_read": 0.025, "cache_write": 0 }, "type": "chat" @@ -83667,8 +83758,8 @@ "release_date": "2025-09-15", "last_updated": "2025-09-15", "cost": { - "input": 0.5, - "output": 2 + "input": 0.3, + "output": 1.5 }, "type": "chat" }, @@ -83978,8 +84069,8 @@ "release_date": "2025-08-05", "last_updated": "2025-08-05", "cost": { - "input": 0.1, - "output": 0.5 + "input": 0.04, + "output": 0.15 }, "type": "chat" }, @@ -84056,9 +84147,9 @@ "release_date": "2026-02-12", "last_updated": "2026-02-12", "cost": { - "input": 1, - "output": 3.2, - "cache_read": 0.2, + "input": 0.72, + "output": 2.3, + "cache_read": 0.144, "cache_write": 0 }, "type": "chat" @@ -84294,9 +84385,9 @@ "release_date": "2025-09-29", "last_updated": "2025-09-29", "cost": { - "input": 0.28, - "output": 0.42, - "cache_read": 0.028 + "input": 0.26, + "output": 0.38, + "cache_read": 0.13 }, "type": "chat" }, @@ -84366,7 +84457,12 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } }, "attachment": true, "open_weights": false, @@ -90578,7 +90674,12 @@ "temperature": false, "tool_call": false, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } }, "attachment": false, "open_weights": false, @@ -91399,6 +91500,50 @@ }, "type": "chat" }, + { + "id": "zai-org/GLM-5.2", + "name": "GLM-5.2", + "display_name": "GLM-5.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1048576, + "output": 32768 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-13", + "last_updated": "2026-06-13", + "cost": { + "input": 0.95, + "output": 3, + "cache_read": 0.18 + }, + "type": "chat" + }, { "id": "zai-org/GLM-5.1", "name": "GLM-5.1", @@ -96681,6 +96826,45 @@ }, "type": "chat" }, + { + "id": "minimaxai/minimax-m3", + "name": "MiniMax-M3", + "display_name": "MiniMax-M3", + "modalities": { + "input": [ + "text", + "image", + "video" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1000000, + "output": 16384 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "attachment": true, + "open_weights": true, + "release_date": "2026-06-01", + "last_updated": "2026-06-01", + "cost": { + "input": 0, + "output": 0 + }, + "type": "chat" + }, { "id": "minimaxai/minimax-m2.7", "name": "MiniMax-M2.7", @@ -99327,9 +99511,9 @@ "doc": "https://docs.evroc.com/products/think/overview.html", "models": [ { - "id": "moonshotai/Kimi-K2.5", - "name": "Kimi K2.5", - "display_name": "Kimi K2.5", + "id": "moonshotai/Kimi-K2.6", + "name": "Kimi K2.6", + "display_name": "Kimi K2.6", "modalities": { "input": [ "text", @@ -99344,6 +99528,7 @@ "context": 262144, "output": 262144 }, + "temperature": true, "tool_call": true, "reasoning": { "supported": true, @@ -99351,29 +99536,24 @@ }, "extra_capabilities": { "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] + "supported": true } }, - "attachment": false, + "attachment": true, "open_weights": true, - "release_date": "2026-01-27", - "last_updated": "2026-01-27", + "knowledge": "2025-01", + "release_date": "2026-04-21", + "last_updated": "2026-04-21", "cost": { - "input": 1.47, - "output": 5.9 + "input": 1.4375, + "output": 5.75 }, "type": "chat" }, { - "id": "microsoft/Phi-4-multimodal-instruct", - "name": "Phi-4 15B", - "display_name": "Phi-4 15B", + "id": "google/gemma-4-26B-A4B-it", + "name": "Gemma 4 26B A4B IT", + "display_name": "Gemma 4 26B A4B IT", "modalities": { "input": [ "text", @@ -99384,20 +99564,22 @@ ] }, "limit": { - "context": 32000, - "output": 32000 + "context": 262144, + "output": 32768 }, - "tool_call": false, + "temperature": true, + "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true }, - "attachment": false, + "attachment": true, "open_weights": true, - "release_date": "2025-01-01", - "last_updated": "2025-01-01", + "release_date": "2026-04-02", + "last_updated": "2026-04-02", "cost": { - "input": 0.24, - "output": 0.47 + "input": 0.144, + "output": 0.575 }, "type": "chat" }, @@ -99426,15 +99608,15 @@ "release_date": "2025-07-30", "last_updated": "2025-07-30", "cost": { - "input": 0.12, - "output": 0.12 + "input": 0.115, + "output": 0.115 }, "type": "embedding" }, { - "id": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", - "name": "Qwen3 30B 2507", - "display_name": "Qwen3 30B 2507", + "id": "Qwen/Qwen3-Reranker-4B", + "name": "Qwen3 Reranker 4B", + "display_name": "Qwen3 Reranker 4B", "modalities": { "input": [ "text" @@ -99444,10 +99626,10 @@ ] }, "limit": { - "context": 64000, - "output": 64000 + "context": 32000, + "output": 4096 }, - "tool_call": true, + "tool_call": false, "reasoning": { "supported": false }, @@ -99456,8 +99638,54 @@ "release_date": "2025-07-30", "last_updated": "2025-07-30", "cost": { - "input": 0.35, - "output": 1.42 + "input": 0.0575, + "output": 0 + }, + "type": "rerank" + }, + { + "id": "Qwen/Qwen3.6-35B-A3B-FP8", + "name": "Qwen3.6 35B-A3B", + "display_name": "Qwen3.6 35B-A3B", + "modalities": { + "input": [ + "text", + "image", + "video", + "audio" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 262144, + "output": 65536 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": true, + "open_weights": true, + "release_date": "2026-04-17", + "last_updated": "2026-04-17", + "cost": { + "input": 0.345, + "output": 1.38 }, "type": "chat" }, @@ -99488,8 +99716,8 @@ "release_date": "2025-07-30", "last_updated": "2025-07-30", "cost": { - "input": 0.24, - "output": 0.94 + "input": 0.23, + "output": 0.92 }, "type": "chat" }, @@ -99524,15 +99752,15 @@ "release_date": "2025-08-05", "last_updated": "2025-08-05", "cost": { - "input": 0.24, - "output": 0.94 + "input": 0.23, + "output": 0.92 }, "type": "chat" }, { - "id": "openai/whisper-large-v3", - "name": "Whisper 3 Large", - "display_name": "Whisper 3 Large", + "id": "openai/whisper-large-v3-turbo", + "name": "Whisper Large v3 Turbo", + "display_name": "Whisper Large v3 Turbo", "modalities": { "input": [ "audio" @@ -99543,7 +99771,7 @@ }, "limit": { "context": 448, - "output": 4096 + "output": 448 }, "tool_call": false, "reasoning": { @@ -99554,69 +99782,73 @@ "release_date": "2024-10-01", "last_updated": "2024-10-01", "cost": { - "input": 0.00236, - "output": 0.00236, - "output_audio": 2.36 + "input": 0.0023, + "output": 0.0023, + "output_audio": 2.3 }, "type": "chat" }, { - "id": "mistralai/devstral-small-2-24b-instruct-2512", - "name": "Devstral Small 2 24B Instruct 2512", - "display_name": "Devstral Small 2 24B Instruct 2512", + "id": "openai/whisper-large-v3", + "name": "Whisper 3 Large", + "display_name": "Whisper 3 Large", "modalities": { "input": [ - "text" + "audio" ], "output": [ "text" ] }, "limit": { - "context": 32768, - "output": 32768 + "context": 448, + "output": 4096 }, - "tool_call": true, + "tool_call": false, "reasoning": { "supported": false }, "attachment": false, "open_weights": true, - "release_date": "2025-12-01", - "last_updated": "2025-12-01", + "release_date": "2024-10-01", + "last_updated": "2024-10-01", "cost": { - "input": 0.12, - "output": 0.47 + "input": 0.0023, + "output": 0.0023, + "output_audio": 2.3 }, "type": "chat" }, { - "id": "mistralai/Magistral-Small-2509", - "name": "Magistral Small 1.2 24B", - "display_name": "Magistral Small 1.2 24B", + "id": "mistralai/Mistral-Medium-3.5-128B", + "name": "Mistral Medium 3.5", + "display_name": "Mistral Medium 3.5", "modalities": { "input": [ - "text" + "text", + "image" ], "output": [ "text" ] }, "limit": { - "context": 131072, - "output": 131072 + "context": 262144, + "output": 262144 }, - "tool_call": false, + "temperature": true, + "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true }, - "attachment": false, + "attachment": true, "open_weights": true, - "release_date": "2025-06-01", - "last_updated": "2025-06-01", + "release_date": "2026-04-29", + "last_updated": "2026-04-29", "cost": { - "input": 0.59, - "output": 2.36 + "input": 1.725, + "output": 6.9 }, "type": "chat" }, @@ -99646,16 +99878,16 @@ "release_date": "2025-03-01", "last_updated": "2025-03-01", "cost": { - "input": 0.00236, - "output": 0.00236, - "output_audio": 2.36 + "input": 0.0023, + "output": 0.0023, + "output_audio": 2.3 }, "type": "chat" }, { "id": "nvidia/Llama-3.3-70B-Instruct-FP8", - "name": "Llama 3.3 70B", - "display_name": "Llama 3.3 70B", + "name": "Llama-3.3-70B-Instruct", + "display_name": "Llama-3.3-70B-Instruct", "modalities": { "input": [ "text" @@ -99665,20 +99897,68 @@ ] }, "limit": { - "context": 131072, - "output": 32768 + "context": 128000, + "output": 4096 }, - "tool_call": false, + "temperature": true, + "tool_call": true, "reasoning": { "supported": false }, - "attachment": false, + "attachment": true, "open_weights": true, - "release_date": "2024-12-01", - "last_updated": "2024-12-01", + "knowledge": "2023-12", + "release_date": "2024-12-06", + "last_updated": "2024-12-06", "cost": { - "input": 1.18, - "output": 1.18 + "input": 1.15, + "output": 1.15 + }, + "type": "chat" + }, + { + "id": "evroc/roc", + "name": "roc", + "display_name": "roc", + "modalities": { + "input": [ + "text", + "image", + "video" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 262144, + "output": 262144 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": true, + "open_weights": false, + "knowledge": "2026-01", + "release_date": "2026-06-06", + "last_updated": "2026-06-06", + "cost": { + "input": 2.875, + "output": 11.516 }, "type": "chat" }, @@ -99707,9 +99987,9 @@ "release_date": "2024-10-01", "last_updated": "2024-10-01", "cost": { - "input": 0.00236, - "output": 0.00236, - "output_audio": 2.36 + "input": 0.0023, + "output": 0.0023, + "output_audio": 2.3 }, "type": "chat" }, @@ -99738,8 +100018,8 @@ "release_date": "2024-06-01", "last_updated": "2024-06-01", "cost": { - "input": 0.12, - "output": 0.12 + "input": 0.114, + "output": 0.114 }, "type": "chat" } @@ -100283,42 +100563,6 @@ }, "type": "chat" }, - { - "id": "claude-3-5-haiku-20241022", - "name": "Claude Haiku 3.5", - "display_name": "Claude Haiku 3.5", - "modalities": { - "input": [ - "text", - "image", - "pdf" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 200000, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "knowledge": "2024-07-31", - "release_date": "2024-10-22", - "last_updated": "2024-10-22", - "cost": { - "input": 0.8, - "output": 4, - "cache_read": 0.08, - "cache_write": 1 - }, - "type": "chat" - }, { "id": "claude-opus-4-0", "name": "Claude Opus 4 (latest)", @@ -101458,42 +101702,6 @@ "cache_write": 3.75 }, "type": "chat" - }, - { - "id": "claude-3-5-haiku-latest", - "name": "Claude Haiku 3.5 (latest)", - "display_name": "Claude Haiku 3.5 (latest)", - "modalities": { - "input": [ - "text", - "image", - "pdf" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 200000, - "output": 8192 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "knowledge": "2024-07-31", - "release_date": "2024-10-22", - "last_updated": "2024-10-22", - "cost": { - "input": 0.8, - "output": 4, - "cache_read": 0.08, - "cache_write": 1 - }, - "type": "chat" } ] }, @@ -107536,7 +107744,7 @@ "release_date": "2026-04-21", "last_updated": "2026-04-21", "cost": { - "input": 0.78, + "input": 0.66, "output": 3.5, "cache_read": 0.2, "cache_write": 0 @@ -107544,35 +107752,95 @@ "type": "chat" }, { - "id": "nvidia/llama-3.3-70b-instruct-fp8", - "name": "Llama 3.3 70B Instruct", - "display_name": "Llama 3.3 70B Instruct", + "id": "moonshotai/Kimi-K2.7-Code", + "name": "Kimi K2.7 Code", + "display_name": "Kimi K2.7 Code", "modalities": { "input": [ + "text", + "image" + ], + "output": [ "text" + ] + }, + "limit": { + "context": 262144, + "output": 262144 + }, + "temperature": false, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": true, + "open_weights": true, + "knowledge": "2025-01", + "release_date": "2026-06-12", + "last_updated": "2026-06-12", + "cost": { + "input": 0.75, + "output": 3.5, + "cache_read": 0.2, + "cache_write": 0 + }, + "type": "chat" + }, + { + "id": "moonshotai/Kimi-K2.6-Fast", + "name": "Kimi K2.6 Fast", + "display_name": "Kimi K2.6 Fast", + "modalities": { + "input": [ + "text", + "image" ], "output": [ "text" ] }, "limit": { - "context": 131072, - "output": 131072 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "attachment": true, "open_weights": true, - "knowledge": "2023-12", - "release_date": "2024-12-06", - "last_updated": "2024-12-06", + "knowledge": "2025-01", + "release_date": "2026-04-21", + "last_updated": "2026-04-21", "cost": { - "input": 0.12, - "output": 0.38, - "cache_read": 0, + "input": 1.32, + "output": 7, + "cache_read": 0.4, "cache_write": 0 }, "type": "chat" @@ -107622,6 +107890,51 @@ }, "type": "chat" }, + { + "id": "zai-org/GLM-5.2", + "name": "GLM 5.2", + "display_name": "GLM 5.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1000000, + "output": 131072 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-13", + "last_updated": "2026-06-13", + "cost": { + "input": 1.2, + "output": 4.2, + "cache_read": 0.26, + "cache_write": 0 + }, + "type": "chat" + }, { "id": "MiniMaxAI/MiniMax-M2.5", "name": "MiniMax M2.5", @@ -107654,9 +107967,9 @@ "release_date": "2026-02-12", "last_updated": "2026-02-12", "cost": { - "input": 0.24, + "input": 0.15, "output": 0.9, - "cache_read": 0.03, + "cache_read": 0.05, "cache_write": 0 }, "type": "chat" @@ -108680,6 +108993,41 @@ }, "type": "chat" }, + { + "id": "step-3.7-flash", + "name": "Step 3.7 Flash", + "display_name": "Step 3.7 Flash", + "modalities": { + "input": [ + "text", + "image" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 256000, + "output": 256000 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": true, + "open_weights": true, + "knowledge": "2026-01-01", + "release_date": "2026-05-29", + "last_updated": "2026-05-29", + "cost": { + "input": 0.19, + "output": 1.13, + "cache_read": 0.04 + }, + "type": "chat" + }, { "id": "step-3.5-flash-2603", "name": "Step 3.5 Flash 2603", @@ -109141,7 +109489,7 @@ ] }, "limit": { - "context": 512000, + "context": 1000000, "output": 128000 }, "temperature": true, @@ -109158,11 +109506,27 @@ "attachment": true, "open_weights": true, "release_date": "2026-06-01", - "last_updated": "2026-06-01", + "last_updated": "2026-06-25", "cost": { - "input": 0.6, - "output": 2.4, - "cache_read": 0.12 + "input": 0.3, + "output": 1.2, + "cache_read": 0.06, + "tiers": [ + { + "input": 0.6, + "output": 2.4, + "cache_read": 0.12, + "tier": { + "type": "context", + "size": 512000 + } + } + ], + "context_over_200k": { + "input": 0.6, + "output": 2.4, + "cache_read": 0.12 + } }, "type": "chat" }, @@ -116701,111 +117065,6 @@ "api": "https://api.siliconflow.com/v1", "doc": "https://cloud.siliconflow.com/models", "models": [ - { - "id": "moonshotai/Kimi-K2-Thinking", - "name": "moonshotai/Kimi-K2-Thinking", - "display_name": "moonshotai/Kimi-K2-Thinking", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262000, - "output": 262000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-11-07", - "last_updated": "2025-11-25", - "cost": { - "input": 0.55, - "output": 2.5 - }, - "type": "chat" - }, - { - "id": "moonshotai/Kimi-K2-Instruct-0905", - "name": "moonshotai/Kimi-K2-Instruct-0905", - "display_name": "moonshotai/Kimi-K2-Instruct-0905", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262000, - "output": 262000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-09-08", - "last_updated": "2025-11-25", - "cost": { - "input": 0.4, - "output": 2 - }, - "type": "chat" - }, - { - "id": "moonshotai/Kimi-K2-Instruct", - "name": "moonshotai/Kimi-K2-Instruct", - "display_name": "moonshotai/Kimi-K2-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-07-13", - "last_updated": "2025-11-25", - "cost": { - "input": 0.58, - "output": 2.29 - }, - "type": "chat" - }, { "id": "moonshotai/Kimi-K2.6", "name": "moonshotai/Kimi-K2.6", @@ -117088,118 +117347,9 @@ "type": "chat" }, { - "id": "Qwen/Qwen3-Omni-30B-A3B-Thinking", - "name": "Qwen/Qwen3-Omni-30B-A3B-Thinking", - "display_name": "Qwen/Qwen3-Omni-30B-A3B-Thinking", - "modalities": { - "input": [ - "text", - "image", - "audio" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 66000, - "output": 66000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-10-04", - "last_updated": "2025-11-25", - "cost": { - "input": 0.1, - "output": 0.4 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-VL-72B-Instruct", - "name": "Qwen/Qwen2.5-VL-72B-Instruct", - "display_name": "Qwen/Qwen2.5-VL-72B-Instruct", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 4000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-01-28", - "last_updated": "2025-11-25", - "cost": { - "input": 0.59, - "output": 0.59 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-VL-32B-Instruct", - "name": "Qwen/Qwen2.5-VL-32B-Instruct", - "display_name": "Qwen/Qwen2.5-VL-32B-Instruct", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-03-24", - "last_updated": "2025-11-25", - "cost": { - "input": 0.27, - "output": 0.27 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", - "name": "Qwen/Qwen3-30B-A3B-Thinking-2507", - "display_name": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "id": "Qwen/Qwen3.6-35B-A3B", + "name": "Qwen3.6 35B-A3B", + "display_name": "Qwen3.6 35B-A3B", "modalities": { "input": [ "text" @@ -117209,14 +117359,13 @@ ] }, "limit": { - "context": 262000, - "output": 131000 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": true }, "extra_capabilities": { "reasoning": { @@ -117231,11 +117380,11 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-07-31", - "last_updated": "2025-11-25", + "release_date": "2026-04-17", + "last_updated": "2026-04-17", "cost": { - "input": 0.09, - "output": 0.3 + "input": 0.2, + "output": 1.6 }, "type": "chat" }, @@ -117303,9 +117452,9 @@ "type": "chat" }, { - "id": "Qwen/Qwen2.5-32B-Instruct", - "name": "Qwen/Qwen2.5-32B-Instruct", - "display_name": "Qwen/Qwen2.5-32B-Instruct", + "id": "Qwen/Qwen3.6-27B", + "name": "Qwen3.6 27B", + "display_name": "Qwen3.6 27B", "modalities": { "input": [ "text" @@ -117315,28 +117464,39 @@ ] }, "limit": { - "context": 33000, - "output": 4000 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "attachment": false, "open_weights": false, - "release_date": "2024-09-19", - "last_updated": "2025-11-25", + "release_date": "2026-04-22", + "last_updated": "2026-04-22", "cost": { - "input": 0.18, - "output": 0.18 + "input": 0.3, + "output": 3.2 }, "type": "chat" }, { - "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", - "name": "Qwen/Qwen3-Next-80B-A3B-Instruct", - "display_name": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "id": "Qwen/Qwen3.5-397B-A17B", + "name": "Qwen3.5 397B-A17B", + "display_name": "Qwen3.5 397B-A17B", "modalities": { "input": [ "text" @@ -117346,21 +117506,32 @@ ] }, "limit": { - "context": 262000, - "output": 262000 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "attachment": false, "open_weights": false, - "release_date": "2025-09-18", - "last_updated": "2025-11-25", + "release_date": "2026-02-15", + "last_updated": "2026-02-15", "cost": { - "input": 0.14, - "output": 1.4 + "input": 0.39, + "output": 2.34 }, "type": "chat" }, @@ -117407,37 +117578,6 @@ }, "type": "chat" }, - { - "id": "Qwen/Qwen2.5-72B-Instruct-128K", - "name": "Qwen/Qwen2.5-72B-Instruct-128K", - "display_name": "Qwen/Qwen2.5-72B-Instruct-128K", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 4000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2024-09-18", - "last_updated": "2025-11-25", - "cost": { - "input": 0.59, - "output": 0.59 - }, - "type": "chat" - }, { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "name": "Qwen/Qwen3-Coder-480B-A35B-Instruct", @@ -117470,9 +117610,9 @@ "type": "chat" }, { - "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", - "name": "Qwen/Qwen3-Coder-30B-A3B-Instruct", - "display_name": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "id": "Qwen/Qwen3.5-122B-A10B", + "name": "Qwen3.5 122B-A10B", + "display_name": "Qwen3.5 122B-A10B", "modalities": { "input": [ "text" @@ -117482,28 +117622,39 @@ ] }, "limit": { - "context": 262000, - "output": 262000 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "attachment": false, "open_weights": false, - "release_date": "2025-08-01", - "last_updated": "2025-11-25", + "release_date": "2026-02-23", + "last_updated": "2026-02-23", "cost": { - "input": 0.07, - "output": 0.28 + "input": 0.26, + "output": 2.08 }, "type": "chat" }, { - "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", - "name": "Qwen/Qwen3-30B-A3B-Instruct-2507", - "display_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "name": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "display_name": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "modalities": { "input": [ "text" @@ -117523,36 +117674,34 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-07-30", + "release_date": "2025-08-01", "last_updated": "2025-11-25", "cost": { - "input": 0.09, - "output": 0.3 + "input": 0.07, + "output": 0.28 }, "type": "chat" }, { - "id": "Qwen/Qwen3-VL-30B-A3B-Thinking", - "name": "Qwen/Qwen3-VL-30B-A3B-Thinking", - "display_name": "Qwen/Qwen3-VL-30B-A3B-Thinking", + "id": "Qwen/Qwen3.5-27B", + "name": "Qwen3.5 27B", + "display_name": "Qwen3.5 27B", "modalities": { "input": [ - "text", - "image" + "text" ], "output": [ "text" ] }, "limit": { - "context": 262000, - "output": 262000 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": true }, "extra_capabilities": { "reasoning": { @@ -117565,24 +117714,23 @@ ] } }, - "attachment": true, + "attachment": false, "open_weights": false, - "release_date": "2025-10-11", - "last_updated": "2025-11-25", + "release_date": "2026-02-23", + "last_updated": "2026-02-23", "cost": { - "input": 0.29, - "output": 1 + "input": 0.25, + "output": 2 }, "type": "chat" }, { - "id": "Qwen/Qwen3-VL-8B-Thinking", - "name": "Qwen/Qwen3-VL-8B-Thinking", - "display_name": "Qwen/Qwen3-VL-8B-Thinking", + "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "display_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", "modalities": { "input": [ - "text", - "image" + "text" ], "output": [ "text" @@ -117595,34 +117743,22 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, - "attachment": true, + "attachment": false, "open_weights": false, - "release_date": "2025-10-15", + "release_date": "2025-07-30", "last_updated": "2025-11-25", "cost": { - "input": 0.18, - "output": 2 + "input": 0.09, + "output": 0.3 }, "type": "chat" }, { - "id": "Qwen/Qwen3-VL-8B-Instruct", - "name": "Qwen/Qwen3-VL-8B-Instruct", - "display_name": "Qwen/Qwen3-VL-8B-Instruct", + "id": "Qwen/Qwen3-VL-30B-A3B-Thinking", + "name": "Qwen/Qwen3-VL-30B-A3B-Thinking", + "display_name": "Qwen/Qwen3-VL-30B-A3B-Thinking", "modalities": { "input": [ "text", @@ -117638,37 +117774,6 @@ }, "temperature": true, "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-10-15", - "last_updated": "2025-11-25", - "cost": { - "input": 0.18, - "output": 0.68 - }, - "type": "chat" - }, - { - "id": "Qwen/QwQ-32B", - "name": "Qwen/QwQ-32B", - "display_name": "Qwen/QwQ-32B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, "reasoning": { "supported": true, "default": true @@ -117684,31 +117789,32 @@ ] } }, - "attachment": false, + "attachment": true, "open_weights": false, - "release_date": "2025-03-06", + "release_date": "2025-10-11", "last_updated": "2025-11-25", "cost": { - "input": 0.15, - "output": 0.58 + "input": 0.29, + "output": 1 }, "type": "chat" }, { - "id": "Qwen/Qwen3-Omni-30B-A3B-Captioner", - "name": "Qwen/Qwen3-Omni-30B-A3B-Captioner", - "display_name": "Qwen/Qwen3-Omni-30B-A3B-Captioner", + "id": "Qwen/Qwen3-VL-8B-Instruct", + "name": "Qwen/Qwen3-VL-8B-Instruct", + "display_name": "Qwen/Qwen3-VL-8B-Instruct", "modalities": { "input": [ - "audio" + "text", + "image" ], "output": [ "text" ] }, "limit": { - "context": 66000, - "output": 66000 + "context": 262000, + "output": 262000 }, "temperature": true, "tool_call": true, @@ -117717,11 +117823,11 @@ }, "attachment": true, "open_weights": false, - "release_date": "2025-10-04", + "release_date": "2025-10-15", "last_updated": "2025-11-25", "cost": { - "input": 0.1, - "output": 0.4 + "input": 0.18, + "output": 0.68 }, "type": "chat" }, @@ -117770,9 +117876,9 @@ "type": "chat" }, { - "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", - "name": "Qwen/Qwen3-Next-80B-A3B-Thinking", - "display_name": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "id": "Qwen/Qwen3-8B", + "name": "Qwen/Qwen3-8B", + "display_name": "Qwen/Qwen3-8B", "modalities": { "input": [ "text" @@ -117782,8 +117888,8 @@ ] }, "limit": { - "context": 262000, - "output": 262000 + "context": 131000, + "output": 131000 }, "temperature": true, "tool_call": true, @@ -117804,81 +117910,18 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-09-25", - "last_updated": "2025-11-25", - "cost": { - "input": 0.14, - "output": 0.57 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-Coder-32B-Instruct", - "name": "Qwen/Qwen2.5-Coder-32B-Instruct", - "display_name": "Qwen/Qwen2.5-Coder-32B-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 33000, - "output": 4000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2024-11-11", - "last_updated": "2025-11-25", - "cost": { - "input": 0.18, - "output": 0.18 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-VL-7B-Instruct", - "name": "Qwen/Qwen2.5-VL-7B-Instruct", - "display_name": "Qwen/Qwen2.5-VL-7B-Instruct", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 33000, - "output": 4000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-01-28", + "release_date": "2025-04-30", "last_updated": "2025-11-25", "cost": { - "input": 0.05, - "output": 0.05 + "input": 0.06, + "output": 0.06 }, "type": "chat" }, { - "id": "Qwen/Qwen3-8B", - "name": "Qwen/Qwen3-8B", - "display_name": "Qwen/Qwen3-8B", + "id": "Qwen/Qwen3.5-9B", + "name": "Qwen/Qwen3.5-9B", + "display_name": "Qwen/Qwen3.5-9B", "modalities": { "input": [ "text" @@ -117888,8 +117931,8 @@ ] }, "limit": { - "context": 131000, - "output": 131000 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, @@ -117909,42 +117952,11 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-04-30", - "last_updated": "2025-11-25", - "cost": { - "input": 0.06, - "output": 0.06 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "name": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "display_name": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262000, - "output": 262000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-07-23", - "last_updated": "2025-11-25", + "release_date": "2026-03-03", + "last_updated": "2026-04-24", "cost": { - "input": 0.09, - "output": 0.6 + "input": 0.1, + "output": 0.15 }, "type": "chat" }, @@ -117967,7 +117979,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true + "supported": true, + "default": true }, "extra_capabilities": { "reasoning": { @@ -118022,37 +118035,6 @@ }, "type": "chat" }, - { - "id": "Qwen/Qwen2.5-14B-Instruct", - "name": "Qwen/Qwen2.5-14B-Instruct", - "display_name": "Qwen/Qwen2.5-14B-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 33000, - "output": 4000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2024-09-18", - "last_updated": "2025-11-25", - "cost": { - "input": 0.1, - "output": 0.1 - }, - "type": "chat" - }, { "id": "Qwen/Qwen3-14B", "name": "Qwen/Qwen3-14B", @@ -118072,7 +118054,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true + "supported": true, + "default": true }, "extra_capabilities": { "reasoning": { @@ -118095,48 +118078,6 @@ }, "type": "chat" }, - { - "id": "Qwen/Qwen3-235B-A22B", - "name": "Qwen/Qwen3-235B-A22B", - "display_name": "Qwen/Qwen3-235B-A22B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-04-30", - "last_updated": "2025-11-25", - "cost": { - "input": 0.35, - "output": 1.42 - }, - "type": "chat" - }, { "id": "Qwen/Qwen2.5-72B-Instruct", "name": "Qwen/Qwen2.5-72B-Instruct", @@ -118169,22 +118110,21 @@ "type": "chat" }, { - "id": "Qwen/Qwen3-Omni-30B-A3B-Instruct", - "name": "Qwen/Qwen3-Omni-30B-A3B-Instruct", - "display_name": "Qwen/Qwen3-Omni-30B-A3B-Instruct", + "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "name": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "display_name": "Qwen/Qwen3-VL-30B-A3B-Instruct", "modalities": { "input": [ "text", - "image", - "audio" + "image" ], "output": [ "text" ] }, "limit": { - "context": 66000, - "output": 66000 + "context": 262000, + "output": 262000 }, "temperature": true, "tool_call": true, @@ -118193,18 +118133,18 @@ }, "attachment": true, "open_weights": false, - "release_date": "2025-10-04", + "release_date": "2025-10-05", "last_updated": "2025-11-25", "cost": { - "input": 0.1, - "output": 0.4 + "input": 0.29, + "output": 1 }, "type": "chat" }, { - "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", - "name": "Qwen/Qwen3-VL-30B-A3B-Instruct", - "display_name": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "id": "Qwen/Qwen3-VL-32B-Thinking", + "name": "Qwen/Qwen3-VL-32B-Thinking", + "display_name": "Qwen/Qwen3-VL-32B-Thinking", "modalities": { "input": [ "text", @@ -118221,40 +118161,50 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "attachment": true, "open_weights": false, - "release_date": "2025-10-05", + "release_date": "2025-10-21", "last_updated": "2025-11-25", "cost": { - "input": 0.29, - "output": 1 + "input": 0.2, + "output": 1.5 }, "type": "chat" }, { - "id": "Qwen/Qwen3-VL-32B-Thinking", - "name": "Qwen/Qwen3-VL-32B-Thinking", - "display_name": "Qwen/Qwen3-VL-32B-Thinking", + "id": "Qwen/Qwen3.5-35B-A3B", + "name": "Qwen3.5 35B-A3B", + "display_name": "Qwen3.5 35B-A3B", "modalities": { "input": [ - "text", - "image" + "text" ], "output": [ "text" ] }, "limit": { - "context": 262000, - "output": 262000 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": true }, "extra_capabilities": { "reasoning": { @@ -118267,13 +118217,13 @@ ] } }, - "attachment": true, + "attachment": false, "open_weights": false, - "release_date": "2025-10-21", - "last_updated": "2025-11-25", + "release_date": "2026-02-23", + "last_updated": "2026-02-23", "cost": { - "input": 0.2, - "output": 1.5 + "input": 0.24, + "output": 1.8 }, "type": "chat" }, @@ -118465,75 +118415,6 @@ }, "type": "chat" }, - { - "id": "zai-org/GLM-4.6", - "name": "zai-org/GLM-4.6", - "display_name": "zai-org/GLM-4.6", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 205000, - "output": 205000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-10-04", - "last_updated": "2025-11-25", - "cost": { - "input": 0.5, - "output": 1.9 - }, - "type": "chat" - }, - { - "id": "zai-org/GLM-4.6V", - "name": "zai-org/GLM-4.6V", - "display_name": "zai-org/GLM-4.6V", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-12-07", - "last_updated": "2025-12-07", - "cost": { - "input": 0.3, - "output": 0.9 - }, - "type": "chat" - }, { "id": "zai-org/GLM-5", "name": "zai-org/GLM-5", @@ -118577,81 +118458,6 @@ }, "type": "chat" }, - { - "id": "zai-org/GLM-4.7", - "name": "zai-org/GLM-4.7", - "display_name": "zai-org/GLM-4.7", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 205000, - "output": 205000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-12-22", - "last_updated": "2025-12-22", - "cost": { - "input": 0.6, - "output": 2.2 - }, - "type": "chat" - }, - { - "id": "zai-org/GLM-4.5V", - "name": "zai-org/GLM-4.5V", - "display_name": "zai-org/GLM-4.5V", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 66000, - "output": 66000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2025-08-13", - "last_updated": "2025-11-25", - "cost": { - "input": 0.14, - "output": 0.86 - }, - "type": "chat" - }, { "id": "zai-org/GLM-5.2", "name": "GLM-5.2", @@ -118665,8 +118471,8 @@ ] }, "limit": { - "context": 205000, - "output": 205000 + "context": 1049000, + "output": 262000 }, "temperature": true, "tool_call": true, @@ -118772,40 +118578,9 @@ "type": "chat" }, { - "id": "zai-org/GLM-4.5", - "name": "zai-org/GLM-4.5", - "display_name": "zai-org/GLM-4.5", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-07-28", - "last_updated": "2025-11-25", - "cost": { - "input": 0.4, - "output": 2 - }, - "type": "chat" - }, - { - "id": "deepseek-ai/deepseek-v4-flash", - "name": "DeepSeek V4 Flash", - "display_name": "DeepSeek V4 Flash", + "id": "deepseek-ai/DeepSeek-R1", + "name": "deepseek-ai/DeepSeek-R1", + "display_name": "deepseek-ai/DeepSeek-R1", "modalities": { "input": [ "text" @@ -118815,8 +118590,8 @@ ] }, "limit": { - "context": 1000000, - "output": 384000 + "context": 164000, + "output": 164000 }, "temperature": true, "tool_call": true, @@ -118836,21 +118611,19 @@ } }, "attachment": false, - "open_weights": true, - "knowledge": "2025-05", - "release_date": "2026-04-24", - "last_updated": "2026-04-24", + "open_weights": false, + "release_date": "2025-05-28", + "last_updated": "2025-11-25", "cost": { - "input": 0.14, - "output": 0.28, - "cache_read": 0.028 + "input": 0.5, + "output": 2.18 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1", - "name": "deepseek-ai/DeepSeek-R1", - "display_name": "deepseek-ai/DeepSeek-R1", + "id": "deepseek-ai/DeepSeek-V3.1-Terminus", + "name": "deepseek-ai/DeepSeek-V3.1-Terminus", + "display_name": "deepseek-ai/DeepSeek-V3.1-Terminus", "modalities": { "input": [ "text" @@ -118869,31 +118642,20 @@ "supported": true, "default": true }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, "attachment": false, "open_weights": false, - "release_date": "2025-05-28", + "release_date": "2025-09-29", "last_updated": "2025-11-25", "cost": { - "input": 0.5, - "output": 2.18 + "input": 0.27, + "output": 1 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "id": "deepseek-ai/DeepSeek-V3.1", + "name": "deepseek-ai/DeepSeek-V3.1", + "display_name": "deepseek-ai/DeepSeek-V3.1", "modalities": { "input": [ "text" @@ -118903,8 +118665,8 @@ ] }, "limit": { - "context": 131000, - "output": 131000 + "context": 164000, + "output": 164000 }, "temperature": true, "tool_call": true, @@ -118914,18 +118676,18 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-01-20", + "release_date": "2025-08-25", "last_updated": "2025-11-25", "cost": { - "input": 0.18, - "output": 0.18 + "input": 0.27, + "output": 1 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-V3.1-Terminus", - "name": "deepseek-ai/DeepSeek-V3.1-Terminus", - "display_name": "deepseek-ai/DeepSeek-V3.1-Terminus", + "id": "deepseek-ai/DeepSeek-V3.2-Exp", + "name": "deepseek-ai/DeepSeek-V3.2-Exp", + "display_name": "deepseek-ai/DeepSeek-V3.2-Exp", "modalities": { "input": [ "text" @@ -118946,18 +118708,18 @@ }, "attachment": false, "open_weights": false, - "release_date": "2025-09-29", + "release_date": "2025-10-10", "last_updated": "2025-11-25", "cost": { "input": 0.27, - "output": 1 + "output": 0.41 }, "type": "chat" }, { - "id": "deepseek-ai/deepseek-v4-pro", - "name": "DeepSeek V4 Pro", - "display_name": "DeepSeek V4 Pro", + "id": "deepseek-ai/DeepSeek-V4-Flash", + "name": "DeepSeek V4 Flash", + "display_name": "DeepSeek V4 Flash", "modalities": { "input": [ "text" @@ -118993,16 +118755,16 @@ "release_date": "2026-04-24", "last_updated": "2026-04-24", "cost": { - "input": 1.74, - "output": 3.48, - "cache_read": 0.145 + "input": 0.14, + "output": 0.28, + "cache_read": 0.028 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-V3.1", - "name": "deepseek-ai/DeepSeek-V3.1", - "display_name": "deepseek-ai/DeepSeek-V3.1", + "id": "deepseek-ai/DeepSeek-V4-Pro", + "name": "DeepSeek V4 Pro", + "display_name": "DeepSeek V4 Pro", "modalities": { "input": [ "text" @@ -119012,8 +118774,8 @@ ] }, "limit": { - "context": 164000, - "output": 164000 + "context": 1000000, + "output": 384000 }, "temperature": true, "tool_call": true, @@ -119021,77 +118783,26 @@ "supported": true, "default": true }, - "attachment": false, - "open_weights": false, - "release_date": "2025-08-25", - "last_updated": "2025-11-25", - "cost": { - "input": 0.27, - "output": 1 - }, - "type": "chat" - }, - { - "id": "deepseek-ai/DeepSeek-V3.2-Exp", - "name": "deepseek-ai/DeepSeek-V3.2-Exp", - "display_name": "deepseek-ai/DeepSeek-V3.2-Exp", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 164000, - "output": 164000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "attachment": false, - "open_weights": false, - "release_date": "2025-10-10", - "last_updated": "2025-11-25", - "cost": { - "input": 0.27, - "output": 0.41 - }, - "type": "chat" - }, - { - "id": "deepseek-ai/deepseek-vl2", - "name": "deepseek-ai/deepseek-vl2", - "display_name": "deepseek-ai/deepseek-vl2", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 4000, - "output": 4000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": false, - "release_date": "2024-12-13", - "last_updated": "2025-11-25", + "open_weights": true, + "knowledge": "2025-05", + "release_date": "2026-04-24", + "last_updated": "2026-04-24", "cost": { - "input": 0.15, - "output": 0.15 + "input": 1.74, + "output": 3.48, + "cache_read": 0.145 }, "type": "chat" }, @@ -119163,38 +118874,6 @@ }, "type": "chat" }, - { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131000, - "output": 131000 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "attachment": false, - "open_weights": false, - "release_date": "2025-01-20", - "last_updated": "2025-11-25", - "cost": { - "input": 0.1, - "output": 0.1 - }, - "type": "chat" - }, { "id": "MiniMaxAI/MiniMax-M2.5", "name": "MiniMaxAI/MiniMax-M2.5", @@ -120215,6 +119894,90 @@ }, "type": "chat" }, + { + "id": "moonshotai/kimi-k2.7-code-highspeed", + "name": "Kimi K2.7 Code Highspeed", + "display_name": "Kimi K2.7 Code Highspeed", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 262144, + "output": 262144 + }, + "temperature": false, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": true, + "open_weights": true, + "knowledge": "2025-01", + "release_date": "2026-06-12", + "last_updated": "2026-06-12", + "cost": { + "input": 1.9, + "output": 8, + "cache_read": 0.38 + }, + "type": "chat" + }, + { + "id": "Qwen/Qwen3.7-Max", + "name": "Qwen3.7 Max", + "display_name": "Qwen3.7 Max", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1000000, + "output": 65536 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "attachment": false, + "open_weights": false, + "release_date": "2026-05-21", + "last_updated": "2026-05-21", + "cost": { + "input": 2.5, + "output": 7.5, + "cache_read": 0.25, + "cache_write": 3.125 + }, + "type": "chat" + }, { "id": "anthropic/claude-opus-4.7", "name": "Claude Opus 4.7", @@ -120487,6 +120250,50 @@ }, "type": "chat" }, + { + "id": "zai-org/GLM-5.2-FP8", + "name": "GLM-5.2", + "display_name": "GLM-5.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1000000, + "output": 131072 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-13", + "last_updated": "2026-06-13", + "cost": { + "input": 0.979, + "output": 3.08, + "cache_read": 0.182 + }, + "type": "chat" + }, { "id": "deepseek-ai/DeepSeek-V4-Flash", "name": "DeepSeek V4 Flash", @@ -121162,6 +120969,43 @@ }, "type": "chat" }, + { + "id": "glm-5.2", + "name": "GLM-5.2", + "display_name": "GLM-5.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 256000, + "output": 16384 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-13", + "last_updated": "2026-06-13", + "cost": { + "input": 1.8, + "output": 5.5 + }, + "type": "chat" + }, { "id": "pixtral-12b-2409", "name": "Pixtral 12B 2409", @@ -121247,7 +121091,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true + "supported": true, + "default": true }, "extra_capabilities": { "reasoning": { @@ -121516,37 +121361,6 @@ "api": "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1", "doc": "https://www.ovhcloud.com/en/public-cloud/ai-endpoints/catalog//", "models": [ - { - "id": "llama-3.1-8b-instruct", - "name": "Llama-3.1-8B-Instruct", - "display_name": "Llama-3.1-8B-Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 131072 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-06-11", - "last_updated": "2025-06-11", - "cost": { - "input": 0.11, - "output": 0.11 - }, - "type": "chat" - }, { "id": "qwen3-coder-30b-a3b-instruct", "name": "Qwen3-Coder-30B-A3B-Instruct", @@ -122046,64 +121860,35 @@ "doc": "https://friendli.ai/docs/guides/serverless_endpoints/introduction", "models": [ { - "id": "meta-llama/Llama-3.3-70B-Instruct", - "name": "Llama 3.3 70B Instruct", - "display_name": "Llama 3.3 70B Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 131072 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2024-08-01", - "last_updated": "2025-12-23", - "cost": { - "input": 0.6, - "output": 0.6 - }, - "type": "chat" - }, - { - "id": "meta-llama/Llama-3.1-8B-Instruct", - "name": "Llama 3.1 8B Instruct", - "display_name": "Llama 3.1 8B Instruct", + "id": "google/gemma-4-31B-it", + "name": "Gemma 4 31B IT", + "display_name": "Gemma 4 31B IT", "modalities": { "input": [ - "text" + "text", + "image" ], "output": [ "text" ] }, "limit": { - "context": 131072, - "output": 8000 + "context": 262144, + "output": 32768 }, "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true }, - "attachment": false, + "attachment": true, "open_weights": true, - "release_date": "2024-08-01", - "last_updated": "2025-12-23", + "release_date": "2026-04-02", + "last_updated": "2026-04-02", "cost": { - "input": 0.1, - "output": 0.1 + "input": 0.14, + "output": 0.4 }, "type": "chat" }, @@ -122126,10 +121911,12 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true }, "attachment": false, "open_weights": true, + "knowledge": "2025-04", "release_date": "2025-07-29", "last_updated": "2026-01-29", "cost": { @@ -122270,6 +122057,50 @@ }, "type": "chat" }, + { + "id": "deepseek-ai/DeepSeek-V3.2", + "name": "DeepSeek-V3.2", + "display_name": "DeepSeek-V3.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 163840, + "output": 163840 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2025-12-01", + "last_updated": "2025-12-01", + "cost": { + "input": 0.5, + "output": 1.5, + "cache_read": 0.25 + }, + "type": "chat" + }, { "id": "MiniMaxAI/MiniMax-M2.5", "name": "MiniMax-M2.5", @@ -122391,8 +122222,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": false, "open_weights": true, @@ -122424,8 +122254,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": false, "open_weights": true, @@ -122488,8 +122317,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": false, "open_weights": true, @@ -122565,8 +122393,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": false, "open_weights": true, @@ -122737,7 +122564,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true + "supported": true, + "default": true }, "extra_capabilities": { "reasoning": { @@ -122773,7 +122601,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true + "supported": true, + "default": true }, "extra_capabilities": { "reasoning": { @@ -122809,7 +122638,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true }, "attachment": false, "open_weights": true, @@ -122978,7 +122808,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true + "supported": true, + "default": true }, "extra_capabilities": { "reasoning": { @@ -124738,8 +124569,7 @@ "temperature": true, "tool_call": false, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": true, "open_weights": false, @@ -132897,6 +132727,11 @@ "supported": true, "default": true }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, "attachment": true, "open_weights": false, "release_date": "2026-03-15", @@ -134426,8 +134261,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": false, "open_weights": true, @@ -135658,8 +135492,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": false, "open_weights": true, @@ -140189,6 +140022,40 @@ }, "type": "chat" }, + { + "id": "xai.grok-4.3", + "name": "Grok 4.3", + "display_name": "Grok 4.3", + "modalities": { + "input": [ + "text", + "image" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 1000000, + "output": 131072 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "attachment": true, + "open_weights": false, + "release_date": "2026-04-17", + "last_updated": "2026-06-28", + "cost": { + "input": 1.25, + "output": 2.5, + "cache_read": 0.2 + }, + "type": "chat" + }, { "id": "mistral.voxtral-mini-3b-2507", "name": "Voxtral Mini 3B 2507", @@ -140729,10 +140596,10 @@ "release_date": "2025-10-15", "last_updated": "2025-10-15", "cost": { - "input": 1, - "output": 5, - "cache_read": 0.1, - "cache_write": 1.25 + "input": 1.1, + "output": 5.5, + "cache_read": 0.11, + "cache_write": 1.375 }, "type": "chat" }, @@ -141585,10 +141452,10 @@ "release_date": "2025-11-24", "last_updated": "2025-08-01", "cost": { - "input": 5, - "output": 25, - "cache_read": 0.5, - "cache_write": 6.25 + "input": 5.5, + "output": 27.5, + "cache_read": 0.55, + "cache_write": 6.875 }, "type": "chat" }, @@ -142943,6 +142810,50 @@ }, "type": "chat" }, + { + "id": "zai-org/GLM-5.2", + "name": "GLM-5.2", + "display_name": "GLM-5.2", + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "limit": { + "context": 262144, + "output": 164000 + }, + "temperature": true, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": false, + "open_weights": true, + "release_date": "2026-06-16", + "last_updated": "2026-06-16", + "cost": { + "input": 1.4, + "output": 4.4, + "cache_read": 0.26 + }, + "type": "chat" + }, { "id": "zai-org/GLM-5.1", "name": "GLM-5.1", @@ -149738,7 +149649,8 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true + "supported": true, + "default": true }, "extra_capabilities": { "reasoning": { @@ -157140,8 +157052,8 @@ ] }, "limit": { - "context": 131000, - "output": 131000 + "context": 262144, + "output": 262144 }, "temperature": true, "tool_call": true, @@ -184160,9 +184072,9 @@ "release_date": "2026-04-21", "last_updated": "2026-04-21", "cost": { - "input": 0.95, - "output": 4, - "cache_read": 0.475 + "input": 0.66, + "output": 3.5, + "cache_read": 0.33 }, "type": "chat" }, @@ -184241,9 +184153,9 @@ "release_date": "2026-04-02", "last_updated": "2026-04-02", "cost": { - "input": 0.13, - "output": 0.38, - "cache_read": 0.065 + "input": 0.12, + "output": 0.37, + "cache_read": 0.06 }, "type": "chat" }, @@ -184275,16 +184187,16 @@ "release_date": "2025-04", "last_updated": "2025-04", "cost": { - "input": 0.08, - "output": 0.24, - "cache_read": 0.04 + "input": 0.104, + "output": 0.416, + "cache_read": 0.052 }, "type": "chat" }, { - "id": "Qwen/Qwen2.5-VL-32B-Instruct", - "name": "Qwen2.5 VL 32B Instruct", - "display_name": "Qwen2.5 VL 32B Instruct", + "id": "Qwen/Qwen3.6-27B-TEE", + "name": "Qwen3.6 27B TEE", + "display_name": "Qwen3.6 27B TEE", "modalities": { "input": [ "text", @@ -184294,70 +184206,6 @@ "text" ] }, - "limit": { - "context": 16384, - "output": 16384 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.0543, - "output": 0.2174, - "cache_read": 0.02715 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", - "name": "Qwen3 Next 80B A3B Instruct", - "display_name": "Qwen3 Next 80B A3B Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262144, - "output": 262144 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.1, - "output": 0.8, - "cache_read": 0.05 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-235B-A22B-Instruct-2507-TEE", - "name": "Qwen3 235B A22B Instruct 2507 TEE", - "display_name": "Qwen3 235B A22B Instruct 2507 TEE", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, "limit": { "context": 262144, "output": 65536 @@ -184365,24 +184213,35 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true }, - "attachment": false, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } + }, + "attachment": true, "open_weights": true, - "knowledge": "2025-04", - "release_date": "2025-04", - "last_updated": "2025-04", + "release_date": "2026-04-22", + "last_updated": "2026-04-22", "cost": { - "input": 0.1, - "output": 0.6, - "cache_read": 0.05 + "input": 0.3, + "output": 2, + "cache_read": 0.15 }, "type": "chat" }, { - "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", - "name": "Qwen3 235B A22B Thinking 2507", - "display_name": "Qwen3 235B A22B Thinking 2507", + "id": "Qwen/Qwen3-235B-A22B-Thinking-2507-TEE", + "name": "Qwen3 235B A22B Thinking 2507 TEE", + "display_name": "Qwen3 235B A22B Thinking 2507 TEE", "modalities": { "input": [ "text" @@ -184414,192 +184273,20 @@ }, "attachment": false, "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", + "knowledge": "2025-04", + "release_date": "2025-07", + "last_updated": "2026-06-21", "cost": { - "input": 0.11, - "output": 0.6, - "cache_read": 0.055 + "input": 0.2989, + "output": 1.1957, + "cache_read": 0.14945 }, "type": "chat" }, { - "id": "Qwen/Qwen3.6-27B-TEE", - "name": "Qwen3.6 27B TEE", - "display_name": "Qwen3.6 27B TEE", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262144, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": true, - "open_weights": true, - "release_date": "2026-04-22", - "last_updated": "2026-04-22", - "cost": { - "input": 0.195, - "output": 1.56, - "cache_read": 0.0975 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3Guard-Gen-0.6B", - "name": "Qwen3Guard Gen 0.6B", - "display_name": "Qwen3Guard Gen 0.6B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 32768, - "output": 8192 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.01, - "output": 0.0109, - "cache_read": 0.005 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-Coder-Next-TEE", - "name": "Qwen3 Coder Next TEE", - "display_name": "Qwen3 Coder Next TEE", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262144, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2026-04-25", - "last_updated": "2026-04-25", - "cost": { - "input": 0.12, - "output": 0.75, - "cache_read": 0.06 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-Coder-32B-Instruct", - "name": "Qwen2.5 Coder 32B Instruct", - "display_name": "Qwen2.5 Coder 32B Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 32768, - "output": 32768 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.0272, - "output": 0.1087, - "cache_read": 0.0136 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen2.5-72B-Instruct", - "name": "Qwen2.5 72B Instruct", - "display_name": "Qwen2.5 72B Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 32768, - "output": 32768 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.2989, - "output": 1.1957, - "cache_read": 0.14945 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3.5-397B-A17B-TEE", - "name": "Qwen3.5 397B A17B TEE", - "display_name": "Qwen3.5 397B A17B TEE", + "id": "Qwen/Qwen3.5-397B-A17B-TEE", + "name": "Qwen3.5 397B A17B TEE", + "display_name": "Qwen3.5 397B A17B TEE", "modalities": { "input": [ "text", @@ -184635,60 +184322,16 @@ "release_date": "2026-02-15", "last_updated": "2026-02-15", "cost": { - "input": 0.39, - "output": 2.34, - "cache_read": 0.195 - }, - "type": "chat" - }, - { - "id": "Qwen/Qwen3-30B-A3B", - "name": "Qwen3 30B A3B", - "display_name": "Qwen3 30B A3B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 40960, - "output": 40960 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.06, - "output": 0.22, - "cache_read": 0.03 + "input": 0.45, + "output": 3, + "cache_read": 0.225 }, "type": "chat" }, { - "id": "openai/gpt-oss-120b-TEE", - "name": "gpt oss 120b TEE", - "display_name": "gpt oss 120b TEE", + "id": "unsloth/Mistral-Nemo-Instruct-2407-TEE", + "name": "Mistral Nemo Instruct 2407 TEE", + "display_name": "Mistral Nemo Instruct 2407 TEE", "modalities": { "input": [ "text" @@ -184699,138 +184342,18 @@ }, "limit": { "context": 131072, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.09, - "output": 0.36, - "cache_read": 0.045 - }, - "type": "chat" - }, - { - "id": "XiaomiMiMo/MiMo-V2-Flash-TEE", - "name": "MiMo V2 Flash TEE", - "display_name": "MiMo V2 Flash TEE", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 262144, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "knowledge": "2024-12-01", - "release_date": "2025-12-16", - "last_updated": "2026-02-04", - "cost": { - "input": 0.09, - "output": 0.29, - "cache_read": 0.045 - }, - "type": "chat" - }, - { - "id": "NousResearch/Hermes-4-14B", - "name": "Hermes 4 14B", - "display_name": "Hermes 4 14B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 40960, - "output": 40960 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.0136, - "output": 0.0543, - "cache_read": 0.0068 - }, - "type": "chat" - }, - { - "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview", - "name": "DeepHermes 3 Mistral 24B Preview", - "display_name": "DeepHermes 3 Mistral 24B Preview", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 32768, - "output": 32768 + "output": 131072 }, "temperature": true, - "tool_call": true, + "tool_call": false, "reasoning": { "supported": false }, "attachment": false, "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", + "knowledge": "2024-07", + "release_date": "2024-07-01", + "last_updated": "2024-07-01", "cost": { "input": 0.0245, "output": 0.0978, @@ -184838,279 +184361,6 @@ }, "type": "chat" }, - { - "id": "unsloth/Llama-3.2-1B-Instruct", - "name": "Llama 3.2 1B Instruct", - "display_name": "Llama 3.2 1B Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 16384, - "output": 8192 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2026-01-27", - "last_updated": "2026-04-25", - "cost": { - "input": 0.01, - "output": 0.0109, - "cache_read": 0.005 - }, - "type": "chat" - }, - { - "id": "unsloth/gemma-3-12b-it", - "name": "gemma 3 12b it", - "display_name": "gemma 3 12b it", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 131072 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.03, - "output": 0.1, - "cache_read": 0.015 - }, - "type": "chat" - }, - { - "id": "unsloth/gemma-3-4b-it", - "name": "gemma 3 4b it", - "display_name": "gemma 3 4b it", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 96000, - "output": 96000 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.01, - "output": 0.0272, - "cache_read": 0.005 - }, - "type": "chat" - }, - { - "id": "unsloth/gemma-3-27b-it", - "name": "gemma 3 27b it", - "display_name": "gemma 3 27b it", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 128000, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.0272, - "output": 0.1087, - "cache_read": 0.0136 - }, - "type": "chat" - }, - { - "id": "unsloth/Mistral-Nemo-Instruct-2407", - "name": "Mistral Nemo Instruct 2407", - "display_name": "Mistral Nemo Instruct 2407", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 131072 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.02, - "output": 0.04, - "cache_read": 0.01 - }, - "type": "chat" - }, - { - "id": "unsloth/Llama-3.2-3B-Instruct", - "name": "Llama 3.2 3B Instruct", - "display_name": "Llama 3.2 3B Instruct", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 16384, - "output": 16384 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-02-12", - "last_updated": "2026-04-25", - "cost": { - "input": 0.01, - "output": 0.0136, - "cache_read": 0.005 - }, - "type": "chat" - }, - { - "id": "tngtech/DeepSeek-TNG-R1T2-Chimera-TEE", - "name": "DeepSeek TNG R1T2 Chimera TEE", - "display_name": "DeepSeek TNG R1T2 Chimera TEE", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 163840, - "output": 163840 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "attachment": false, - "open_weights": true, - "release_date": "2026-04-25", - "last_updated": "2026-04-25", - "cost": { - "input": 0.3, - "output": 1.1, - "cache_read": 0.15 - }, - "type": "chat" - }, - { - "id": "zai-org/GLM-4.7-TEE", - "name": "GLM 4.7 TEE", - "display_name": "GLM 4.7 TEE", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 202752, - "output": 65535 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "knowledge": "2025-04", - "release_date": "2025-12-22", - "last_updated": "2025-12-22", - "cost": { - "input": 0.39, - "output": 1.75, - "cache_read": 0.195 - }, - "type": "chat" - }, { "id": "zai-org/GLM-5-TEE", "name": "GLM 5 TEE", @@ -185193,61 +184443,16 @@ "release_date": "2026-04-07", "last_updated": "2026-04-07", "cost": { - "input": 1.05, - "output": 3.5, - "cache_read": 0.525 - }, - "type": "chat" - }, - { - "id": "zai-org/GLM-4.6V", - "name": "GLM 4.6V", - "display_name": "GLM 4.6V", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": true, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.3, - "output": 0.9, - "cache_read": 0.15 + "input": 0.98, + "output": 3.08, + "cache_read": 0.49 }, "type": "chat" }, { - "id": "zai-org/GLM-5-Turbo", - "name": "GLM 5 Turbo", - "display_name": "GLM 5 Turbo", + "id": "zai-org/GLM-5.2-TEE", + "name": "GLM 5.2 TEE", + "display_name": "GLM 5.2 TEE", "modalities": { "input": [ "text" @@ -185257,7 +184462,7 @@ ] }, "limit": { - "context": 202752, + "context": 1048576, "output": 65535 }, "temperature": true, @@ -185279,45 +184484,12 @@ }, "attachment": false, "open_weights": true, - "release_date": "2026-03-11", - "last_updated": "2026-04-25", - "cost": { - "input": 0.4891, - "output": 1.9565, - "cache_read": 0.24455 - }, - "type": "chat" - }, - { - "id": "zai-org/GLM-4.7-FP8", - "name": "GLM 4.7 FP8", - "display_name": "GLM 4.7 FP8", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 202752, - "output": 65535 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "attachment": false, - "open_weights": true, - "release_date": "2026-01-27", - "last_updated": "2026-04-25", + "release_date": "2026-06-13", + "last_updated": "2026-06-13", "cost": { - "input": 0.2989, - "output": 1.1957, - "cache_read": 0.14945 + "input": 1.4, + "output": 4.4, + "cache_read": 0.7 }, "type": "chat" }, @@ -185356,177 +184528,12 @@ }, "attachment": false, "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.28, - "output": 0.42, - "cache_read": 0.14 - }, - "type": "chat" - }, - { - "id": "deepseek-ai/DeepSeek-V3-0324-TEE", - "name": "DeepSeek V3 0324 TEE", - "display_name": "DeepSeek V3 0324 TEE", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 163840, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": false - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.25, - "output": 1, - "cache_read": 0.125 - }, - "type": "chat" - }, - { - "id": "deepseek-ai/DeepSeek-V3.1-TEE", - "name": "DeepSeek V3.1 TEE", - "display_name": "DeepSeek V3.1 TEE", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 163840, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", + "release_date": "2025-12", + "last_updated": "2026-06-21", "cost": { - "input": 0.27, + "input": 1, "output": 1, - "cache_read": 0.135 - }, - "type": "chat" - }, - { - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "name": "DeepSeek R1 Distill Llama 70B", - "display_name": "DeepSeek R1 Distill Llama 70B", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 131072 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.0272, - "output": 0.1087, - "cache_read": 0.0136 - }, - "type": "chat" - }, - { - "id": "deepseek-ai/DeepSeek-R1-0528-TEE", - "name": "DeepSeek R1 0528 TEE", - "display_name": "DeepSeek R1 0528 TEE", - "modalities": { - "input": [ - "text" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 163840, - "output": 65536 - }, - "temperature": true, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "attachment": false, - "open_weights": true, - "knowledge": "2024-07", - "release_date": "2025-01-20", - "last_updated": "2025-05-29", - "cost": { - "input": 0.45, - "output": 2.15, - "cache_read": 0.225 + "cache_read": 0.5 }, "type": "chat" }, @@ -185573,39 +184580,6 @@ "cache_read": 0.075 }, "type": "chat" - }, - { - "id": "rednote-hilab/dots.ocr", - "name": "dots.ocr", - "display_name": "dots.ocr", - "modalities": { - "input": [ - "text", - "image" - ], - "output": [ - "text" - ] - }, - "limit": { - "context": 131072, - "output": 131072 - }, - "temperature": true, - "tool_call": false, - "reasoning": { - "supported": false - }, - "attachment": true, - "open_weights": true, - "release_date": "2025-12-29", - "last_updated": "2026-04-25", - "cost": { - "input": 0.01, - "output": 0.0109, - "cache_read": 0.005 - }, - "type": "chat" } ] }, @@ -185828,7 +184802,7 @@ ] }, "limit": { - "context": 512000, + "context": 1000000, "output": 128000 }, "temperature": true, @@ -185845,7 +184819,7 @@ "attachment": true, "open_weights": true, "release_date": "2026-06-01", - "last_updated": "2026-06-01", + "last_updated": "2026-06-25", "cost": { "input": 0, "output": 0, @@ -186297,7 +185271,7 @@ ] }, "limit": { - "context": 512000, + "context": 1000000, "output": 128000 }, "temperature": true, @@ -186314,11 +185288,27 @@ "attachment": true, "open_weights": true, "release_date": "2026-06-01", - "last_updated": "2026-06-01", + "last_updated": "2026-06-25", "cost": { - "input": 0.6, - "output": 2.4, - "cache_read": 0.12 + "input": 0.3, + "output": 1.2, + "cache_read": 0.06, + "tiers": [ + { + "input": 0.6, + "output": 2.4, + "cache_read": 0.12, + "tier": { + "type": "context", + "size": 512000 + } + } + ], + "context_over_200k": { + "input": 0.6, + "output": 2.4, + "cache_read": 0.12 + } }, "type": "chat" }, @@ -188650,7 +187640,13 @@ }, "extra_capabilities": { "reasoning": { - "supported": true + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] } }, "attachment": true, @@ -188748,8 +187744,8 @@ }, { "id": "hf:nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4", - "name": "Nemotron 3 Super 120B", - "display_name": "Nemotron 3 Super 120B", + "name": "Nemotron 3 Super 120B A12B", + "display_name": "Nemotron 3 Super 120B A12B", "modalities": { "input": [ "text" @@ -188781,9 +187777,8 @@ }, "attachment": false, "open_weights": true, - "knowledge": "2024-04", "release_date": "2026-03-11", - "last_updated": "2026-04-03", + "last_updated": "2026-03-11", "cost": { "input": 0.3, "output": 1, @@ -206824,7 +205819,7 @@ "default": true }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -206849,7 +205844,7 @@ "default": true }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -206874,7 +205869,7 @@ "default": true }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -206898,7 +205893,7 @@ "supported": false }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -206922,7 +205917,7 @@ "supported": false }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -206946,7 +205941,7 @@ "supported": false }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -206971,7 +205966,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -206996,7 +205991,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "embedding" }, { @@ -207021,7 +206016,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "embedding" }, { @@ -207047,7 +206042,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207073,7 +206068,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207099,7 +206094,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207125,7 +206120,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207151,7 +206146,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207177,7 +206172,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207203,7 +206198,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207229,7 +206224,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207255,7 +206250,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207281,7 +206276,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207307,7 +206302,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207333,7 +206328,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207359,7 +206354,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207385,7 +206380,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207409,7 +206404,7 @@ "supported": false }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207435,7 +206430,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207461,7 +206456,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207487,7 +206482,7 @@ "default": true }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207511,7 +206506,7 @@ "supported": false }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" }, { @@ -207532,7 +206527,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z" + "last_updated": "2026-06-27T07:10:57Z" }, { "id": "doubao-seedance-1-0-pro-fast-251015", @@ -207552,7 +206547,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z" + "last_updated": "2026-06-27T07:10:57Z" }, { "id": "doubao-seedance-1-5-pro-251215", @@ -207573,7 +206568,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z" + "last_updated": "2026-06-27T07:10:57Z" }, { "id": "doubao-seedance-2-0-260128", @@ -207595,7 +206590,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z" + "last_updated": "2026-06-27T07:10:57Z" }, { "id": "doubao-seedance-2-0-fast-260128", @@ -207617,7 +206612,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z" + "last_updated": "2026-06-27T07:10:57Z" }, { "id": "doubao-seedance-2-0-mini-260615", @@ -207639,7 +206634,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z" + "last_updated": "2026-06-27T07:10:57Z" }, { "id": "doubao-seedream-4-0-250828", @@ -207655,7 +206650,7 @@ "supported": false }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "imageGeneration" }, { @@ -207672,7 +206667,7 @@ "supported": false }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "imageGeneration" }, { @@ -207693,7 +206688,7 @@ "supported": false }, "attachment": true, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "imageGeneration" }, { @@ -207718,7 +206713,7 @@ "default": true }, "attachment": false, - "last_updated": "2026-06-25T05:23:56Z", + "last_updated": "2026-06-27T07:10:57Z", "type": "chat" } ] @@ -208444,7 +207439,7 @@ "display_name": "Kimi K2 Thinking", "limit": { "context": 262144, - "output": 262144 + "output": 100352 }, "tool_call": true, "reasoning": { @@ -208740,7 +207735,7 @@ "display_name": "Kimi K2 0905", "limit": { "context": 262144, - "output": 262144 + "output": 100352 }, "tool_call": true, "reasoning": { @@ -208783,7 +207778,7 @@ "display_name": "Kimi K2 Instruct", "limit": { "context": 131072, - "output": 32768 + "output": 100352 }, "tool_call": true, "reasoning": { @@ -214678,12 +213673,15 @@ "display_name": "AIHubMix", "models": [ { - "id": "glm-5.2", - "name": "glm-5.2", - "display_name": "glm-5.2", + "id": "auto", + "name": "auto", + "display_name": "auto", "modalities": { "input": [ - "text" + "text", + "audio", + "image", + "video" ] }, "limit": { @@ -214701,20 +213699,18 @@ } }, "cost": { - "input": 1.1268, - "output": 3.9438, - "cache_read": 0.2817 + "input": 2, + "output": 2 }, "type": "chat" }, { - "id": "claude-fable-5", - "name": "claude-fable-5", - "display_name": "claude-fable-5", + "id": "glm-5.2", + "name": "glm-5.2", + "display_name": "glm-5.2", "modalities": { "input": [ - "text", - "image" + "text" ] }, "limit": { @@ -214728,34 +213724,13 @@ }, "extra_capabilities": { "reasoning": { - "supported": true, - "default_enabled": true, - "mode": "effort", - "effort": "high", - "effort_options": [ - "low", - "medium", - "high", - "xhigh", - "max" - ], - "interleaved": true, - "summaries": true, - "visibility": "omitted", - "continuation": [ - "thinking_blocks" - ], - "notes": [ - "Adaptive thinking is always on for Claude Fable 5 and Claude Mythos 5; thinking.type = \"disabled\" is rejected.", - "Manual budget_tokens requests return 400 on Claude Fable 5 and Claude Mythos 5.", - "thinking.display defaults to omitted; set display to summarized to receive readable thinking summaries." - ] + "supported": true } }, "cost": { - "input": 11, - "output": 55, - "cache_read": 1.1 + "input": 1.1268, + "output": 3.9438, + "cache_read": 0.2817 }, "type": "chat" }, @@ -216514,6 +215489,30 @@ }, "type": "chat" }, + { + "id": "doubao-seedance-2-0-mini-260615", + "name": "doubao-seedance-2-0-mini-260615", + "display_name": "doubao-seedance-2-0-mini-260615", + "modalities": { + "input": [ + "image", + "text" + ] + }, + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 2, + "output": 0 + }, + "type": "chat" + }, { "id": "glm-5.1", "name": "glm-5.1", @@ -216569,13 +215568,13 @@ "type": "imageGeneration" }, { - "id": "wan2.7-videoedit", - "name": "wan2.7-videoedit", - "display_name": "wan2.7-videoedit", + "id": "wan2.7-i2v", + "name": "wan2.7-i2v", + "display_name": "wan2.7-i2v", "modalities": { "input": [ - "text", - "video" + "image", + "text" ] }, "limit": { @@ -216593,12 +215592,13 @@ "type": "chat" }, { - "id": "wan2.7-t2v", - "name": "wan2.7-t2v", - "display_name": "wan2.7-t2v", + "id": "wan2.7-videoedit", + "name": "wan2.7-videoedit", + "display_name": "wan2.7-videoedit", "modalities": { "input": [ - "text" + "text", + "video" ] }, "limit": { @@ -216616,13 +215616,12 @@ "type": "chat" }, { - "id": "wan2.7-r2v", - "name": "wan2.7-r2v", - "display_name": "wan2.7-r2v", + "id": "wan2.7-t2v", + "name": "wan2.7-t2v", + "display_name": "wan2.7-t2v", "modalities": { "input": [ - "text", - "video" + "text" ] }, "limit": { @@ -216640,13 +215639,13 @@ "type": "chat" }, { - "id": "wan2.7-i2v", - "name": "wan2.7-i2v", - "display_name": "wan2.7-i2v", + "id": "wan2.7-r2v", + "name": "wan2.7-r2v", + "display_name": "wan2.7-r2v", "modalities": { "input": [ - "image", - "text" + "text", + "video" ] }, "limit": { @@ -220510,44 +219509,6 @@ }, "type": "chat" }, - { - "id": "alicloud-glm-5.2", - "name": "alicloud-glm-5.2", - "display_name": "alicloud-glm-5.2", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 1.1268, - "output": 3.9438, - "cache_read": 0.2817 - }, - "type": "chat" - }, - { - "id": "baidu-glm-5.2", - "name": "baidu-glm-5.2", - "display_name": "baidu-glm-5.2", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 1.1268, - "output": 3.9438, - "cache_read": 0.2817 - }, - "type": "chat" - }, { "id": "cc-glm-5", "name": "cc-glm-5", @@ -220606,6 +219567,25 @@ }, "type": "chat" }, + { + "id": "cloudflare-glm-5.2", + "name": "cloudflare-glm-5.2", + "display_name": "cloudflare-glm-5.2", + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 1.4, + "output": 4.4002, + "cache_read": 0.2604 + }, + "type": "chat" + }, { "id": "coding-step-3.5-flash-free", "name": "coding-step-3.5-flash-free", @@ -220631,25 +219611,6 @@ }, "type": "chat" }, - { - "id": "deepinfra-glm-5.2", - "name": "deepinfra-glm-5.2", - "display_name": "deepinfra-glm-5.2", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 1.1, - "output": 4.4, - "cache_read": 0.198 - }, - "type": "chat" - }, { "id": "gemini-2.5-flash-image", "name": "gemini-2.5-flash-image", @@ -222008,11 +220969,12 @@ "type": "chat" }, { - "id": "wan2.6-t2v", - "name": "wan2.6-t2v", - "display_name": "wan2.6-t2v", + "id": "wan2.6-i2v", + "name": "wan2.6-i2v", + "display_name": "wan2.6-i2v", "modalities": { "input": [ + "image", "text" ] }, @@ -222031,12 +220993,11 @@ "type": "chat" }, { - "id": "wan2.6-i2v", - "name": "wan2.6-i2v", - "display_name": "wan2.6-i2v", + "id": "wan2.6-t2v", + "name": "wan2.6-t2v", + "display_name": "wan2.6-t2v", "modalities": { "input": [ - "image", "text" ] }, @@ -228291,29 +227252,6 @@ }, "type": "embedding" }, - { - "id": "AiHubmix-Phi-4-mini-reasoning", - "name": "AiHubmix-Phi-4-mini-reasoning", - "display_name": "AiHubmix-Phi-4-mini-reasoning", - "modalities": { - "input": [ - "text" - ] - }, - "limit": { - "context": 128000, - "output": 128000 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 0.12, - "output": 0.12 - }, - "type": "chat" - }, { "id": "qwen-turbo-latest", "name": "qwen-turbo-latest", @@ -228344,6 +227282,29 @@ }, "type": "chat" }, + { + "id": "AiHubmix-Phi-4-mini-reasoning", + "name": "AiHubmix-Phi-4-mini-reasoning", + "display_name": "AiHubmix-Phi-4-mini-reasoning", + "modalities": { + "input": [ + "text" + ] + }, + "limit": { + "context": 128000, + "output": 128000 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 0.12, + "output": 0.12 + }, + "type": "chat" + }, { "id": "aihub-Phi-4-multimodal-instruct", "name": "aihub-Phi-4-multimodal-instruct", @@ -228399,24 +227360,6 @@ }, "type": "chat" }, - { - "id": "grok-3", - "name": "grok-3", - "display_name": "grok-3", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 3, - "output": 15 - }, - "type": "chat" - }, { "id": "aihub-Phi-4-mini-instruct", "name": "aihub-Phi-4-mini-instruct", @@ -228440,6 +227383,66 @@ }, "type": "chat" }, + { + "id": "grok-3", + "name": "grok-3", + "display_name": "grok-3", + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 3, + "output": 15 + }, + "type": "chat" + }, + { + "id": "doubao-embedding-text-240715", + "name": "doubao-embedding-text-240715", + "display_name": "doubao-embedding-text-240715", + "modalities": { + "input": [ + "text" + ] + }, + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 0.7, + "output": 0.7 + }, + "type": "embedding" + }, + { + "id": "grok-3-beta", + "name": "grok-3-beta", + "display_name": "grok-3-beta", + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 3, + "output": 15, + "cache_read": 0 + }, + "type": "chat" + }, { "id": "aihub-Phi-4", "name": "aihub-Phi-4", @@ -228536,32 +227539,39 @@ "type": "chat" }, { - "id": "doubao-embedding-text-240715", - "name": "doubao-embedding-text-240715", - "display_name": "doubao-embedding-text-240715", - "modalities": { - "input": [ - "text" - ] - }, + "id": "qwen3-8b", + "name": "qwen3-8b", + "display_name": "qwen3-8b", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "cost": { - "input": 0.7, - "output": 0.7 + "input": 0.08, + "output": 0.8, + "cache_read": 0 }, - "type": "embedding" + "type": "chat" }, { - "id": "grok-3-beta", - "name": "grok-3-beta", - "display_name": "grok-3-beta", + "id": "grok-3-fast", + "name": "grok-3-fast", + "display_name": "grok-3-fast", "limit": { "context": 8192, "output": 8192 @@ -228571,16 +227581,16 @@ "supported": false }, "cost": { - "input": 3, - "output": 15, + "input": 5.5, + "output": 27.5, "cache_read": 0 }, "type": "chat" }, { - "id": "grok-3-fast", - "name": "grok-3-fast", - "display_name": "grok-3-fast", + "id": "grok-3-fast-beta", + "name": "grok-3-fast-beta", + "display_name": "grok-3-fast-beta", "limit": { "context": 8192, "output": 8192 @@ -228597,31 +227607,20 @@ "type": "chat" }, { - "id": "qwen3-8b", - "name": "qwen3-8b", - "display_name": "qwen3-8b", + "id": "grok-3-mini", + "name": "grok-3-mini", + "display_name": "grok-3-mini", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, "cost": { - "input": 0.08, - "output": 0.8, + "input": 0.3, + "output": 0.501, "cache_read": 0 }, "type": "chat" @@ -228675,39 +227674,31 @@ "type": "chat" }, { - "id": "grok-3-fast-beta", - "name": "grok-3-fast-beta", - "display_name": "grok-3-fast-beta", + "id": "qwen3-1.7b", + "name": "qwen3-1.7b", + "display_name": "qwen3-1.7b", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": false - }, - "cost": { - "input": 5.5, - "output": 27.5, - "cache_read": 0 - }, - "type": "chat" - }, - { - "id": "grok-3-mini", - "name": "grok-3-mini", - "display_name": "grok-3-mini", - "limit": { - "context": 8192, - "output": 8192 + "supported": true }, - "tool_call": false, - "reasoning": { - "supported": false + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "cost": { - "input": 0.3, - "output": 0.501, + "input": 0.046, + "output": 0.46, "cache_read": 0 }, "type": "chat" @@ -228731,36 +227722,6 @@ }, "type": "chat" }, - { - "id": "qwen3-1.7b", - "name": "qwen3-1.7b", - "display_name": "qwen3-1.7b", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } - }, - "cost": { - "input": 0.046, - "output": 0.46, - "cache_read": 0 - }, - "type": "chat" - }, { "id": "qwen3-0.6b", "name": "qwen3-0.6b", @@ -229095,9 +228056,9 @@ "type": "chat" }, { - "id": "deepseek-ai/Janus-Pro-7B", - "name": "deepseek-ai/Janus-Pro-7B", - "display_name": "deepseek-ai/Janus-Pro-7B", + "id": "glm-zero-preview", + "name": "glm-zero-preview", + "display_name": "glm-zero-preview", "limit": { "context": 8192, "output": 8192 @@ -229113,9 +228074,9 @@ "type": "chat" }, { - "id": "glm-zero-preview", - "name": "glm-zero-preview", - "display_name": "glm-zero-preview", + "id": "deepseek-ai/Janus-Pro-7B", + "name": "deepseek-ai/Janus-Pro-7B", + "display_name": "deepseek-ai/Janus-Pro-7B", "limit": { "context": 8192, "output": 8192 @@ -229273,32 +228234,32 @@ "type": "chat" }, { - "id": "glm-4.5-air", - "name": "glm-4.5-air", - "display_name": "glm-4.5-air", + "id": "coding-glm-4.5-air", + "name": "coding-glm-4.5-air", + "display_name": "coding-glm-4.5-air", "modalities": { "input": [ "text" ] }, "limit": { - "context": 131072, - "output": 131072 + "context": 8192, + "output": 8192 }, "tool_call": false, "reasoning": { "supported": false }, "cost": { - "input": 0.14, - "output": 0.84 + "input": 0.014, + "output": 0.084 }, "type": "chat" }, { - "id": "gpt-4-32k", - "name": "gpt-4-32k", - "display_name": "gpt-4-32k", + "id": "deepinfra-nvidia-nemotron-3-nano-30b-a3b2", + "name": "deepinfra-nvidia-nemotron-3-nano-30b-a3b2", + "display_name": "deepinfra-nvidia-nemotron-3-nano-30b-a3b2", "limit": { "context": 8192, "output": 8192 @@ -229308,38 +228269,38 @@ "supported": false }, "cost": { - "input": 60, - "output": 120 + "input": 0.066, + "output": 0.264 }, "type": "chat" }, { - "id": "coding-glm-4.5-air", - "name": "coding-glm-4.5-air", - "display_name": "coding-glm-4.5-air", + "id": "glm-4.5-air", + "name": "glm-4.5-air", + "display_name": "glm-4.5-air", "modalities": { "input": [ "text" ] }, "limit": { - "context": 8192, - "output": 8192 + "context": 131072, + "output": 131072 }, "tool_call": false, "reasoning": { "supported": false }, "cost": { - "input": 0.014, - "output": 0.084 + "input": 0.14, + "output": 0.84 }, "type": "chat" }, { - "id": "deepinfra-nvidia-nemotron-3-nano-30b-a3b2", - "name": "deepinfra-nvidia-nemotron-3-nano-30b-a3b2", - "display_name": "deepinfra-nvidia-nemotron-3-nano-30b-a3b2", + "id": "gpt-4-32k", + "name": "gpt-4-32k", + "display_name": "gpt-4-32k", "limit": { "context": 8192, "output": 8192 @@ -229349,8 +228310,8 @@ "supported": false }, "cost": { - "input": 0.066, - "output": 0.264 + "input": 60, + "output": 120 }, "type": "chat" }, @@ -229491,30 +228452,6 @@ }, "type": "chat" }, - { - "id": "wan2.6-t2i", - "name": "wan2.6-t2i", - "display_name": "wan2.6-t2i", - "modalities": { - "input": [ - "image", - "text" - ] - }, - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 2, - "output": 0 - }, - "type": "imageGeneration" - }, { "id": "grok-2-1212", "name": "grok-2-1212", @@ -229534,116 +228471,28 @@ "type": "chat" }, { - "id": "gpt-image-test", - "name": "gpt-image-test", - "display_name": "gpt-image-test", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 5, - "output": 40, - "cache_read": 0 - }, - "type": "chat" - }, - { - "id": "grok-4.20-beta-0309-non-reasoning", - "name": "grok-4.20-beta-0309-non-reasoning", - "display_name": "grok-4.20-beta-0309-non-reasoning", - "modalities": { - "input": [ - "text", - "image" - ] - }, - "limit": { - "context": 2000000, - "output": 2000000 - }, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } - }, - "cost": { - "input": 2, - "output": 6, - "cache_read": 0.2 - }, - "type": "chat" - }, - { - "id": "grok-4.20-beta-0309-reasoning", - "name": "grok-4.20-beta-0309-reasoning", - "display_name": "grok-4.20-beta-0309-reasoning", - "modalities": { - "input": [ - "text", - "image" - ] - }, - "limit": { - "context": 2000000, - "output": 2000000 - }, - "tool_call": true, - "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } - }, - "cost": { - "input": 2, - "output": 6, - "cache_read": 0.2 - }, - "type": "chat" - }, - { - "id": "grok-4.20-multi-agent-beta-0309", - "name": "grok-4.20-multi-agent-beta-0309", - "display_name": "grok-4.20-multi-agent-beta-0309", + "id": "wan2.6-t2i", + "name": "wan2.6-t2i", + "display_name": "wan2.6-t2i", "modalities": { "input": [ - "text", - "image" + "image", + "text" ] }, "limit": { - "context": 2000000, - "output": 2000000 + "context": 8192, + "output": 8192 }, - "tool_call": true, + "tool_call": false, "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } + "supported": false }, "cost": { "input": 2, - "output": 6, - "cache_read": 0.2 + "output": 0 }, - "type": "chat" + "type": "imageGeneration" }, { "id": "imagen-3.0-generate-002", @@ -230005,9 +228854,9 @@ "type": "chat" }, { - "id": "Baichuan3-Turbo", - "name": "Baichuan3-Turbo", - "display_name": "Baichuan3-Turbo", + "id": "gpt-image-test", + "name": "gpt-image-test", + "display_name": "gpt-image-test", "limit": { "context": 8192, "output": 8192 @@ -230017,51 +228866,109 @@ "supported": false }, "cost": { - "input": 1.9, - "output": 1.9 + "input": 5, + "output": 40, + "cache_read": 0 }, "type": "chat" }, { - "id": "Baichuan3-Turbo-128k", - "name": "Baichuan3-Turbo-128k", - "display_name": "Baichuan3-Turbo-128k", + "id": "grok-4.20-beta-0309-non-reasoning", + "name": "grok-4.20-beta-0309-non-reasoning", + "display_name": "grok-4.20-beta-0309-non-reasoning", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { - "context": 8192, - "output": 8192 + "context": 2000000, + "output": 2000000 }, - "tool_call": false, + "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } }, "cost": { - "input": 3.8, - "output": 3.8 + "input": 2, + "output": 6, + "cache_read": 0.2 }, "type": "chat" }, { - "id": "Baichuan4", - "name": "Baichuan4", - "display_name": "Baichuan4", + "id": "grok-4.20-beta-0309-reasoning", + "name": "grok-4.20-beta-0309-reasoning", + "display_name": "grok-4.20-beta-0309-reasoning", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { - "context": 8192, - "output": 8192 + "context": 2000000, + "output": 2000000 }, - "tool_call": false, + "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } }, "cost": { - "input": 16, - "output": 16 + "input": 2, + "output": 6, + "cache_read": 0.2 }, "type": "chat" }, { - "id": "Baichuan4-Air", - "name": "Baichuan4-Air", - "display_name": "Baichuan4-Air", + "id": "grok-4.20-multi-agent-beta-0309", + "name": "grok-4.20-multi-agent-beta-0309", + "display_name": "grok-4.20-multi-agent-beta-0309", + "modalities": { + "input": [ + "text", + "image" + ] + }, + "limit": { + "context": 2000000, + "output": 2000000 + }, + "tool_call": true, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "cost": { + "input": 2, + "output": 6, + "cache_read": 0.2 + }, + "type": "chat" + }, + { + "id": "deepseek-ai/deepseek-llm-67b-chat", + "name": "deepseek-ai/deepseek-llm-67b-chat", + "display_name": "deepseek-ai/deepseek-llm-67b-chat", "limit": { "context": 8192, "output": 8192 @@ -230077,9 +228984,9 @@ "type": "chat" }, { - "id": "Baichuan4-Turbo", - "name": "Baichuan4-Turbo", - "display_name": "Baichuan4-Turbo", + "id": "deepseek-ai/deepseek-vl2", + "name": "deepseek-ai/deepseek-vl2", + "display_name": "deepseek-ai/deepseek-vl2", "limit": { "context": 8192, "output": 8192 @@ -230089,15 +228996,15 @@ "supported": false }, "cost": { - "input": 2.4, - "output": 2.4 + "input": 0.16, + "output": 0.16 }, "type": "chat" }, { - "id": "DeepSeek-v3", - "name": "DeepSeek-v3", - "display_name": "DeepSeek-v3", + "id": "deepseek-v3", + "name": "deepseek-v3", + "display_name": "deepseek-v3", "limit": { "context": 8192, "output": 8192 @@ -230108,14 +229015,20 @@ }, "cost": { "input": 0.272, - "output": 1.088 + "output": 1.088, + "cache_read": 0 }, "type": "chat" }, { - "id": "Doubao-1.5-lite-32k", - "name": "Doubao-1.5-lite-32k", - "display_name": "Doubao-1.5-lite-32k", + "id": "distil-whisper-large-v3-en", + "name": "distil-whisper-large-v3-en", + "display_name": "distil-whisper-large-v3-en", + "modalities": { + "input": [ + "audio" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -230125,16 +229038,15 @@ "supported": false }, "cost": { - "input": 0.05, - "output": 0.1, - "cache_read": 0.01 + "input": 5.556, + "output": 5.556 }, "type": "chat" }, { - "id": "Doubao-1.5-pro-256k", - "name": "Doubao-1.5-pro-256k", - "display_name": "Doubao-1.5-pro-256k", + "id": "doubao-1-5-thinking-vision-pro-250428", + "name": "doubao-1-5-thinking-vision-pro-250428", + "display_name": "doubao-1-5-thinking-vision-pro-250428", "limit": { "context": 8192, "output": 8192 @@ -230144,16 +229056,16 @@ "supported": false }, "cost": { - "input": 0.8, - "output": 1.44, - "cache_read": 0.8 + "input": 2, + "output": 2, + "cache_read": 2 }, "type": "chat" }, { - "id": "Doubao-1.5-pro-32k", - "name": "Doubao-1.5-pro-32k", - "display_name": "Doubao-1.5-pro-32k", + "id": "fx-flux-2-pro", + "name": "fx-flux-2-pro", + "display_name": "fx-flux-2-pro", "limit": { "context": 8192, "output": 8192 @@ -230163,34 +229075,68 @@ "supported": false }, "cost": { - "input": 0.134, - "output": 0.335, - "cache_read": 0.0268 + "input": 2, + "output": 0, + "cache_read": 0 }, "type": "chat" }, { - "id": "Doubao-1.5-vision-pro-32k", - "name": "Doubao-1.5-vision-pro-32k", - "display_name": "Doubao-1.5-vision-pro-32k", + "id": "gemini-2.5-pro-exp-03-25", + "name": "gemini-2.5-pro-exp-03-25", + "display_name": "gemini-2.5-pro-exp-03-25", + "modalities": { + "input": [ + "text", + "image", + "audio", + "video" + ] + }, "limit": { "context": 8192, "output": 8192 }, - "tool_call": false, + "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "default_enabled": true, + "mode": "budget", + "budget": { + "default": -1, + "min": 128, + "max": 32768, + "auto": -1, + "unit": "tokens" + }, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thought_signatures" + ] + } }, "cost": { - "input": 0.46, - "output": 1.38 + "input": 1.25, + "output": 5, + "cache_read": 0.125 }, "type": "chat" }, { - "id": "Doubao-lite-128k", - "name": "Doubao-lite-128k", - "display_name": "Doubao-lite-128k", + "id": "gemini-embedding-exp-03-07", + "name": "gemini-embedding-exp-03-07", + "display_name": "gemini-embedding-exp-03-07", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -230200,16 +229146,15 @@ "supported": false }, "cost": { - "input": 0.14, - "output": 0.28, - "cache_read": 0.14 + "input": 0.02, + "output": 0.02 }, - "type": "chat" + "type": "embedding" }, { - "id": "Doubao-lite-32k", - "name": "Doubao-lite-32k", - "display_name": "Doubao-lite-32k", + "id": "gemini-exp-1114", + "name": "gemini-exp-1114", + "display_name": "gemini-exp-1114", "limit": { "context": 8192, "output": 8192 @@ -230219,16 +229164,15 @@ "supported": false }, "cost": { - "input": 0.06, - "output": 0.12, - "cache_read": 0.012 + "input": 1.25, + "output": 5 }, "type": "chat" }, { - "id": "Doubao-lite-4k", - "name": "Doubao-lite-4k", - "display_name": "Doubao-lite-4k", + "id": "gemini-exp-1121", + "name": "gemini-exp-1121", + "display_name": "gemini-exp-1121", "limit": { "context": 8192, "output": 8192 @@ -230238,16 +229182,15 @@ "supported": false }, "cost": { - "input": 0.06, - "output": 0.12, - "cache_read": 0.06 + "input": 1.25, + "output": 5 }, "type": "chat" }, { - "id": "Doubao-pro-128k", - "name": "Doubao-pro-128k", - "display_name": "Doubao-pro-128k", + "id": "gemini-pro", + "name": "gemini-pro", + "display_name": "gemini-pro", "limit": { "context": 8192, "output": 8192 @@ -230257,15 +229200,15 @@ "supported": false }, "cost": { - "input": 0.8, - "output": 1.44 + "input": 0.2, + "output": 0.6 }, "type": "chat" }, { - "id": "Doubao-pro-256k", - "name": "Doubao-pro-256k", - "display_name": "Doubao-pro-256k", + "id": "gemini-pro-vision", + "name": "gemini-pro-vision", + "display_name": "gemini-pro-vision", "limit": { "context": 8192, "output": 8192 @@ -230275,16 +229218,15 @@ "supported": false }, "cost": { - "input": 0.8, - "output": 1.44, - "cache_read": 0.8 + "input": 1, + "output": 1 }, "type": "chat" }, { - "id": "Doubao-pro-32k", - "name": "Doubao-pro-32k", - "display_name": "Doubao-pro-32k", + "id": "gemma-7b-it", + "name": "gemma-7b-it", + "display_name": "gemma-7b-it", "limit": { "context": 8192, "output": 8192 @@ -230294,16 +229236,15 @@ "supported": false }, "cost": { - "input": 0.14, - "output": 0.35, - "cache_read": 0.028 + "input": 0.1, + "output": 0.1 }, "type": "chat" }, { - "id": "Doubao-pro-4k", - "name": "Doubao-pro-4k", - "display_name": "Doubao-pro-4k", + "id": "glm-3-turbo", + "name": "glm-3-turbo", + "display_name": "glm-3-turbo", "limit": { "context": 8192, "output": 8192 @@ -230313,38 +229254,33 @@ "supported": false }, "cost": { - "input": 0.14, - "output": 0.35 + "input": 0.71, + "output": 0.71 }, "type": "chat" }, { - "id": "GPT-OSS-20B", - "name": "GPT-OSS-20B", - "display_name": "GPT-OSS-20B", + "id": "glm-4", + "name": "glm-4", + "display_name": "glm-4", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } + "supported": false }, "cost": { - "input": 0.11, - "output": 0.55 + "input": 14.2, + "output": 14.2 }, "type": "chat" }, { - "id": "Gryphe/MythoMax-L2-13b", - "name": "Gryphe/MythoMax-L2-13b", - "display_name": "Gryphe/MythoMax-L2-13b", + "id": "glm-4-flash", + "name": "glm-4-flash", + "display_name": "glm-4-flash", "limit": { "context": 8192, "output": 8192 @@ -230354,20 +229290,15 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 0.4 + "input": 0.1, + "output": 0.1 }, "type": "chat" }, { - "id": "MiniMax-Text-01", - "name": "MiniMax-Text-01", - "display_name": "MiniMax-Text-01", - "modalities": { - "input": [ - "text" - ] - }, + "id": "glm-4-plus", + "name": "glm-4-plus", + "display_name": "glm-4-plus", "limit": { "context": 8192, "output": 8192 @@ -230377,15 +229308,20 @@ "supported": false }, "cost": { - "input": 0.14, - "output": 1.12 + "input": 8, + "output": 8 }, "type": "chat" }, { - "id": "Mistral-large-2407", - "name": "Mistral-large-2407", - "display_name": "Mistral-large-2407", + "id": "glm-4.5-airx", + "name": "glm-4.5-airx", + "display_name": "glm-4.5-airx", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -230395,15 +229331,16 @@ "supported": false }, "cost": { - "input": 3, - "output": 9 + "input": 1.1, + "output": 4.51, + "cache_read": 0.22 }, "type": "chat" }, { - "id": "Qwen/Qwen2-1.5B-Instruct", - "name": "Qwen/Qwen2-1.5B-Instruct", - "display_name": "Qwen/Qwen2-1.5B-Instruct", + "id": "glm-4v", + "name": "glm-4v", + "display_name": "glm-4v", "limit": { "context": 8192, "output": 8192 @@ -230413,15 +229350,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 14.2, + "output": 14.2 }, "type": "chat" }, { - "id": "Qwen/Qwen2-57B-A14B-Instruct", - "name": "Qwen/Qwen2-57B-A14B-Instruct", - "display_name": "Qwen/Qwen2-57B-A14B-Instruct", + "id": "glm-4v-plus", + "name": "glm-4v-plus", + "display_name": "glm-4v-plus", "limit": { "context": 8192, "output": 8192 @@ -230431,15 +229368,15 @@ "supported": false }, "cost": { - "input": 0.24, - "output": 0.24 + "input": 2, + "output": 2 }, "type": "chat" }, { - "id": "Qwen/Qwen2-72B-Instruct", - "name": "Qwen/Qwen2-72B-Instruct", - "display_name": "Qwen/Qwen2-72B-Instruct", + "id": "google-gemma-3-12b-it", + "name": "google-gemma-3-12b-it", + "display_name": "google-gemma-3-12b-it", "limit": { "context": 8192, "output": 8192 @@ -230449,15 +229386,15 @@ "supported": false }, "cost": { - "input": 0.8, - "output": 0.8 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "Qwen/Qwen2-7B-Instruct", - "name": "Qwen/Qwen2-7B-Instruct", - "display_name": "Qwen/Qwen2-7B-Instruct", + "id": "google-gemma-3-27b-it", + "name": "google-gemma-3-27b-it", + "display_name": "google-gemma-3-27b-it", "limit": { "context": 8192, "output": 8192 @@ -230467,15 +229404,16 @@ "supported": false }, "cost": { - "input": 0.08, - "output": 0.08 + "input": 0.2, + "output": 0.2, + "cache_read": 0 }, "type": "chat" }, { - "id": "Qwen/Qwen2.5-32B-Instruct", - "name": "Qwen/Qwen2.5-32B-Instruct", - "display_name": "Qwen/Qwen2.5-32B-Instruct", + "id": "google-gemma-3-4b-it", + "name": "google-gemma-3-4b-it", + "display_name": "google-gemma-3-4b-it", "limit": { "context": 8192, "output": 8192 @@ -230485,15 +229423,16 @@ "supported": false }, "cost": { - "input": 0.6, - "output": 0.6 + "input": 0.2, + "output": 0.2, + "cache_read": 0 }, "type": "chat" }, { - "id": "Qwen/Qwen2.5-72B-Instruct", - "name": "Qwen/Qwen2.5-72B-Instruct", - "display_name": "Qwen/Qwen2.5-72B-Instruct", + "id": "google/gemini-exp-1114", + "name": "google/gemini-exp-1114", + "display_name": "google/gemini-exp-1114", "limit": { "context": 8192, "output": 8192 @@ -230503,15 +229442,15 @@ "supported": false }, "cost": { - "input": 0.8, - "output": 0.8 + "input": 1.25, + "output": 5 }, "type": "chat" }, { - "id": "Qwen/Qwen2.5-72B-Instruct-128K", - "name": "Qwen/Qwen2.5-72B-Instruct-128K", - "display_name": "Qwen/Qwen2.5-72B-Instruct-128K", + "id": "google/gemma-2-27b-it", + "name": "google/gemma-2-27b-it", + "display_name": "google/gemma-2-27b-it", "limit": { "context": 8192, "output": 8192 @@ -230527,9 +229466,9 @@ "type": "chat" }, { - "id": "Qwen/Qwen2.5-7B-Instruct", - "name": "Qwen/Qwen2.5-7B-Instruct", - "display_name": "Qwen/Qwen2.5-7B-Instruct", + "id": "google/gemma-2-9b-it:free", + "name": "google/gemma-2-9b-it:free", + "display_name": "google/gemma-2-9b-it:free", "limit": { "context": 8192, "output": 8192 @@ -230539,15 +229478,15 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 0.4 + "input": 0.02, + "output": 0.02 }, "type": "chat" }, { - "id": "Qwen/Qwen2.5-Coder-32B-Instruct", - "name": "Qwen/Qwen2.5-Coder-32B-Instruct", - "display_name": "Qwen/Qwen2.5-Coder-32B-Instruct", + "id": "gpt-3.5-turbo", + "name": "gpt-3.5-turbo", + "display_name": "gpt-3.5-turbo", "limit": { "context": 8192, "output": 8192 @@ -230557,50 +229496,33 @@ "supported": false }, "cost": { - "input": 0.16, - "output": 0.16 + "input": 0.5, + "output": 1.5 }, "type": "chat" }, { - "id": "Qwen3-235B-A22B-Thinking-2507", - "name": "Qwen3-235B-A22B-Thinking-2507", - "display_name": "Qwen3-235B-A22B-Thinking-2507", + "id": "gpt-3.5-turbo-0301", + "name": "gpt-3.5-turbo-0301", + "display_name": "gpt-3.5-turbo-0301", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, "cost": { - "input": 0.28, - "output": 2.8 + "input": 1.5, + "output": 1.5 }, "type": "chat" }, { - "id": "Stable-Diffusion-3-5-Large", - "name": "Stable-Diffusion-3-5-Large", - "display_name": "Stable-Diffusion-3-5-Large", - "modalities": { - "input": [ - "text", - "image" - ] - }, + "id": "gpt-3.5-turbo-0613", + "name": "gpt-3.5-turbo-0613", + "display_name": "gpt-3.5-turbo-0613", "limit": { "context": 8192, "output": 8192 @@ -230610,16 +229532,15 @@ "supported": false }, "cost": { - "input": 4, - "output": 4, - "cache_read": 0 + "input": 1.5, + "output": 2 }, - "type": "imageGeneration" + "type": "chat" }, { - "id": "WizardLM/WizardCoder-Python-34B-V1.0", - "name": "WizardLM/WizardCoder-Python-34B-V1.0", - "display_name": "WizardLM/WizardCoder-Python-34B-V1.0", + "id": "gpt-3.5-turbo-1106", + "name": "gpt-3.5-turbo-1106", + "display_name": "gpt-3.5-turbo-1106", "limit": { "context": 8192, "output": 8192 @@ -230629,15 +229550,15 @@ "supported": false }, "cost": { - "input": 0.9, - "output": 0.9 + "input": 1, + "output": 2 }, "type": "chat" }, { - "id": "ahm-Phi-3-5-MoE-instruct", - "name": "ahm-Phi-3-5-MoE-instruct", - "display_name": "ahm-Phi-3-5-MoE-instruct", + "id": "gpt-3.5-turbo-16k", + "name": "gpt-3.5-turbo-16k", + "display_name": "gpt-3.5-turbo-16k", "limit": { "context": 8192, "output": 8192 @@ -230647,15 +229568,15 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 1.6 + "input": 3, + "output": 4 }, "type": "chat" }, { - "id": "ahm-Phi-3-5-mini-instruct", - "name": "ahm-Phi-3-5-mini-instruct", - "display_name": "ahm-Phi-3-5-mini-instruct", + "id": "gpt-3.5-turbo-16k-0613", + "name": "gpt-3.5-turbo-16k-0613", + "display_name": "gpt-3.5-turbo-16k-0613", "limit": { "context": 8192, "output": 8192 @@ -230665,21 +229586,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 3 + "input": 3, + "output": 4 }, "type": "chat" }, { - "id": "ahm-Phi-3-5-vision-instruct", - "name": "ahm-Phi-3-5-vision-instruct", - "display_name": "ahm-Phi-3-5-vision-instruct", - "modalities": { - "input": [ - "text", - "image" - ] - }, + "id": "gpt-3.5-turbo-instruct", + "name": "gpt-3.5-turbo-instruct", + "display_name": "gpt-3.5-turbo-instruct", "limit": { "context": 8192, "output": 8192 @@ -230689,15 +229604,15 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 1.6 + "input": 1.5, + "output": 2 }, "type": "chat" }, { - "id": "ahm-Phi-3-medium-128k", - "name": "ahm-Phi-3-medium-128k", - "display_name": "ahm-Phi-3-medium-128k", + "id": "gpt-4", + "name": "gpt-4", + "display_name": "gpt-4", "limit": { "context": 8192, "output": 8192 @@ -230707,15 +229622,15 @@ "supported": false }, "cost": { - "input": 6, - "output": 18 + "input": 30, + "output": 60 }, "type": "chat" }, { - "id": "ahm-Phi-3-medium-4k", - "name": "ahm-Phi-3-medium-4k", - "display_name": "ahm-Phi-3-medium-4k", + "id": "gpt-4-0125-preview", + "name": "gpt-4-0125-preview", + "display_name": "gpt-4-0125-preview", "limit": { "context": 8192, "output": 8192 @@ -230725,15 +229640,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 3 + "input": 10, + "output": 30 }, "type": "chat" }, { - "id": "ahm-Phi-3-small-128k", - "name": "ahm-Phi-3-small-128k", - "display_name": "ahm-Phi-3-small-128k", + "id": "gpt-4-0314", + "name": "gpt-4-0314", + "display_name": "gpt-4-0314", "limit": { "context": 8192, "output": 8192 @@ -230743,15 +229658,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 3 + "input": 30, + "output": 60 }, "type": "chat" }, { - "id": "aihubmix-Codestral-2501", - "name": "aihubmix-Codestral-2501", - "display_name": "aihubmix-Codestral-2501", + "id": "gpt-4-0613", + "name": "gpt-4-0613", + "display_name": "gpt-4-0613", "limit": { "context": 8192, "output": 8192 @@ -230761,20 +229676,15 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 1.2 + "input": 30, + "output": 60 }, "type": "chat" }, { - "id": "aihubmix-Cohere-command-r", - "name": "aihubmix-Cohere-command-r", - "display_name": "aihubmix-Cohere-command-r", - "modalities": { - "input": [ - "text" - ] - }, + "id": "gpt-4-1106-preview", + "name": "gpt-4-1106-preview", + "display_name": "gpt-4-1106-preview", "limit": { "context": 8192, "output": 8192 @@ -230784,15 +229694,15 @@ "supported": false }, "cost": { - "input": 0.64, - "output": 1.92 + "input": 10, + "output": 30 }, "type": "chat" }, { - "id": "aihubmix-Jamba-1-5-Large", - "name": "aihubmix-Jamba-1-5-Large", - "display_name": "aihubmix-Jamba-1-5-Large", + "id": "gpt-4-32k-0314", + "name": "gpt-4-32k-0314", + "display_name": "gpt-4-32k-0314", "limit": { "context": 8192, "output": 8192 @@ -230802,15 +229712,15 @@ "supported": false }, "cost": { - "input": 2.2, - "output": 8.8 + "input": 60, + "output": 120 }, "type": "chat" }, { - "id": "aihubmix-Llama-3-1-405B-Instruct", - "name": "aihubmix-Llama-3-1-405B-Instruct", - "display_name": "aihubmix-Llama-3-1-405B-Instruct", + "id": "gpt-4-32k-0613", + "name": "gpt-4-32k-0613", + "display_name": "gpt-4-32k-0613", "limit": { "context": 8192, "output": 8192 @@ -230820,15 +229730,15 @@ "supported": false }, "cost": { - "input": 5, - "output": 15 + "input": 60, + "output": 120 }, "type": "chat" }, { - "id": "aihubmix-Llama-3-1-70B-Instruct", - "name": "aihubmix-Llama-3-1-70B-Instruct", - "display_name": "aihubmix-Llama-3-1-70B-Instruct", + "id": "gpt-4-turbo", + "name": "gpt-4-turbo", + "display_name": "gpt-4-turbo", "limit": { "context": 8192, "output": 8192 @@ -230838,15 +229748,15 @@ "supported": false }, "cost": { - "input": 0.6, - "output": 0.78 + "input": 10, + "output": 30 }, "type": "chat" }, { - "id": "aihubmix-Llama-3-1-8B-Instruct", - "name": "aihubmix-Llama-3-1-8B-Instruct", - "display_name": "aihubmix-Llama-3-1-8B-Instruct", + "id": "gpt-4-turbo-2024-04-09", + "name": "gpt-4-turbo-2024-04-09", + "display_name": "gpt-4-turbo-2024-04-09", "limit": { "context": 8192, "output": 8192 @@ -230856,15 +229766,15 @@ "supported": false }, "cost": { - "input": 0.3, - "output": 0.6 + "input": 10, + "output": 30 }, "type": "chat" }, { - "id": "aihubmix-Llama-3-2-11B-Vision", - "name": "aihubmix-Llama-3-2-11B-Vision", - "display_name": "aihubmix-Llama-3-2-11B-Vision", + "id": "gpt-4-turbo-preview", + "name": "gpt-4-turbo-preview", + "display_name": "gpt-4-turbo-preview", "limit": { "context": 8192, "output": 8192 @@ -230874,15 +229784,15 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 0.4 + "input": 10, + "output": 30 }, "type": "chat" }, { - "id": "aihubmix-Llama-3-2-90B-Vision", - "name": "aihubmix-Llama-3-2-90B-Vision", - "display_name": "aihubmix-Llama-3-2-90B-Vision", + "id": "gpt-4-vision-preview", + "name": "gpt-4-vision-preview", + "display_name": "gpt-4-vision-preview", "limit": { "context": 8192, "output": 8192 @@ -230892,33 +229802,40 @@ "supported": false }, "cost": { - "input": 2.4, - "output": 2.4 + "input": 10, + "output": 30 }, "type": "chat" }, { - "id": "aihubmix-Llama-3-70B-Instruct", - "name": "aihubmix-Llama-3-70B-Instruct", - "display_name": "aihubmix-Llama-3-70B-Instruct", + "id": "gpt-4o-2024-05-13", + "name": "gpt-4o-2024-05-13", + "display_name": "gpt-4o-2024-05-13", "limit": { - "context": 8192, - "output": 8192 + "context": 128000, + "output": 128000 }, "tool_call": false, "reasoning": { "supported": false }, "cost": { - "input": 0.7, - "output": 0.7 + "input": 5, + "output": 15, + "cache_read": 5 }, "type": "chat" }, { - "id": "aihubmix-Mistral-large", - "name": "aihubmix-Mistral-large", - "display_name": "aihubmix-Mistral-large", + "id": "gpt-4o-mini-2024-07-18", + "name": "gpt-4o-mini-2024-07-18", + "display_name": "gpt-4o-mini-2024-07-18", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -230928,41 +229845,49 @@ "supported": false }, "cost": { - "input": 4, - "output": 12 + "input": 0.15, + "output": 0.6, + "cache_read": 0.075 }, "type": "chat" }, { - "id": "aihubmix-command-r-08-2024", - "name": "aihubmix-command-r-08-2024", - "display_name": "aihubmix-command-r-08-2024", + "id": "gpt-oss-20b", + "name": "gpt-oss-20b", + "display_name": "gpt-oss-20b", "modalities": { "input": [ "text" ] }, "limit": { - "context": 8192, - "output": 8192 + "context": 128000, + "output": 128000 }, - "tool_call": false, + "tool_call": true, "reasoning": { - "supported": false + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } }, "cost": { - "input": 0.2, - "output": 0.8 + "input": 0.11, + "output": 0.55 }, "type": "chat" }, { - "id": "aihubmix-command-r-plus", - "name": "aihubmix-command-r-plus", - "display_name": "aihubmix-command-r-plus", + "id": "grok-2-vision-1212", + "name": "grok-2-vision-1212", + "display_name": "grok-2-vision-1212", "modalities": { "input": [ - "text" + "text", + "image" ] }, "limit": { @@ -230974,18 +229899,19 @@ "supported": false }, "cost": { - "input": 3.84, - "output": 19.2 + "input": 1.8, + "output": 9 }, "type": "chat" }, { - "id": "aihubmix-command-r-plus-08-2024", - "name": "aihubmix-command-r-plus-08-2024", - "display_name": "aihubmix-command-r-plus-08-2024", + "id": "grok-vision-beta", + "name": "grok-vision-beta", + "display_name": "grok-vision-beta", "modalities": { "input": [ - "text" + "text", + "image" ] }, "limit": { @@ -230997,15 +229923,15 @@ "supported": false }, "cost": { - "input": 2.8, - "output": 11.2 + "input": 5.6, + "output": 16.8 }, "type": "chat" }, { - "id": "alicloud-deepseek-v3.2", - "name": "alicloud-deepseek-v3.2", - "display_name": "alicloud-deepseek-v3.2", + "id": "groq-llama-3.1-8b-instant", + "name": "groq-llama-3.1-8b-instant", + "display_name": "groq-llama-3.1-8b-instant", "limit": { "context": 8192, "output": 8192 @@ -231015,16 +229941,15 @@ "supported": false }, "cost": { - "input": 0.274, - "output": 0.411, - "cache_read": 0.0548 + "input": 0.055, + "output": 0.088 }, "type": "chat" }, { - "id": "alicloud-glm-4.7", - "name": "alicloud-glm-4.7", - "display_name": "alicloud-glm-4.7", + "id": "groq-llama-3.3-70b-versatile", + "name": "groq-llama-3.3-70b-versatile", + "display_name": "groq-llama-3.3-70b-versatile", "limit": { "context": 8192, "output": 8192 @@ -231034,16 +229959,15 @@ "supported": false }, "cost": { - "input": 0.41096, - "output": 1.917786, - "cache_read": 0.41096 + "input": 0.649, + "output": 0.869011 }, "type": "chat" }, { - "id": "alicloud-kimi-k2-thinking", - "name": "alicloud-kimi-k2-thinking", - "display_name": "alicloud-kimi-k2-thinking", + "id": "groq-llama-4-maverick-17b-128e-instruct", + "name": "groq-llama-4-maverick-17b-128e-instruct", + "display_name": "groq-llama-4-maverick-17b-128e-instruct", "limit": { "context": 8192, "output": 8192 @@ -231053,34 +229977,39 @@ "supported": false }, "cost": { - "input": 0.548, - "output": 2.192 + "input": 0.22, + "output": 0.66 }, "type": "chat" }, { - "id": "alicloud-kimi-k2.5", - "name": "alicloud-kimi-k2.5", - "display_name": "alicloud-kimi-k2.5", + "id": "groq-llama-4-scout-17b-16e-instruct", + "name": "groq-llama-4-scout-17b-16e-instruct", + "display_name": "groq-llama-4-scout-17b-16e-instruct", "limit": { - "context": 256000, - "output": 256000 + "context": 8192, + "output": 8192 }, "tool_call": false, "reasoning": { "supported": false }, "cost": { - "input": 0.548, - "output": 2.877, - "cache_read": 0.0959 + "input": 0.122, + "output": 0.366 }, "type": "chat" }, { - "id": "alicloud-minimax-m2.5", - "name": "alicloud-minimax-m2.5", - "display_name": "alicloud-minimax-m2.5", + "id": "imagen-4.0-generate-preview-05-20", + "name": "imagen-4.0-generate-preview-05-20", + "display_name": "imagen-4.0-generate-preview-05-20", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -231090,47 +230019,39 @@ "supported": false }, "cost": { - "input": 0.2876, - "output": 1.1504, - "cache_read": 0.05752 + "input": 2, + "output": 2, + "cache_read": 0 }, - "type": "chat" + "type": "imageGeneration" }, { - "id": "anthropic-opus-4-6", - "name": "anthropic-opus-4-6", - "display_name": "anthropic-opus-4-6", + "id": "jina-embeddings-v2-base-code", + "name": "jina-embeddings-v2-base-code", + "display_name": "jina-embeddings-v2-base-code", "modalities": { "input": [ - "text", - "image" + "text" ] }, "limit": { - "context": 200000, - "output": 200000 + "context": 8192, + "output": 8192 }, - "tool_call": true, + "tool_call": false, "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } + "supported": false }, "cost": { - "input": 5, - "output": 25, - "cache_read": 0.5 + "input": 0.05, + "output": 0.05 }, - "type": "chat" + "type": "embedding" }, { - "id": "azure-deepseek-v3.2", - "name": "azure-deepseek-v3.2", - "display_name": "azure-deepseek-v3.2", + "id": "learnlm-1.5-pro-experimental", + "name": "learnlm-1.5-pro-experimental", + "display_name": "learnlm-1.5-pro-experimental", "limit": { "context": 8192, "output": 8192 @@ -231140,15 +230061,15 @@ "supported": false }, "cost": { - "input": 0.58, - "output": 1.680028 + "input": 1.25, + "output": 5 }, "type": "chat" }, { - "id": "azure-deepseek-v3.2-speciale", - "name": "azure-deepseek-v3.2-speciale", - "display_name": "azure-deepseek-v3.2-speciale", + "id": "llama-3.1-405b-instruct", + "name": "llama-3.1-405b-instruct", + "display_name": "llama-3.1-405b-instruct", "limit": { "context": 8192, "output": 8192 @@ -231158,33 +230079,33 @@ "supported": false }, "cost": { - "input": 0.58, - "output": 1.680028 + "input": 4, + "output": 4 }, "type": "chat" }, { - "id": "azure-kimi-k2.5", - "name": "azure-kimi-k2.5", - "display_name": "azure-kimi-k2.5", + "id": "llama-3.1-405b-reasoning", + "name": "llama-3.1-405b-reasoning", + "display_name": "llama-3.1-405b-reasoning", "limit": { - "context": 256000, - "output": 256000 + "context": 8192, + "output": 8192 }, "tool_call": false, "reasoning": { "supported": false }, "cost": { - "input": 0.6, - "output": 3 + "input": 4, + "output": 4 }, "type": "chat" }, { - "id": "cbs-glm-4.7", - "name": "cbs-glm-4.7", - "display_name": "cbs-glm-4.7", + "id": "llama-3.1-70b-versatile", + "name": "llama-3.1-70b-versatile", + "display_name": "llama-3.1-70b-versatile", "limit": { "context": 8192, "output": 8192 @@ -231194,15 +230115,15 @@ "supported": false }, "cost": { - "input": 2.25, - "output": 2.749995 + "input": 0.6, + "output": 0.6 }, "type": "chat" }, { - "id": "cerebras-llama-3.3-70b", - "name": "cerebras-llama-3.3-70b", - "display_name": "cerebras-llama-3.3-70b", + "id": "llama-3.1-8b-instant", + "name": "llama-3.1-8b-instant", + "display_name": "llama-3.1-8b-instant", "limit": { "context": 8192, "output": 8192 @@ -231212,15 +230133,15 @@ "supported": false }, "cost": { - "input": 0.6, + "input": 0.3, "output": 0.6 }, "type": "chat" }, { - "id": "chatglm_lite", - "name": "chatglm_lite", - "display_name": "chatglm_lite", + "id": "llama-3.1-sonar-small-128k-online", + "name": "llama-3.1-sonar-small-128k-online", + "display_name": "llama-3.1-sonar-small-128k-online", "limit": { "context": 8192, "output": 8192 @@ -231230,15 +230151,15 @@ "supported": false }, "cost": { - "input": 0.2858, - "output": 0.2858 + "input": 0.3, + "output": 0.3 }, "type": "chat" }, { - "id": "chatglm_pro", - "name": "chatglm_pro", - "display_name": "chatglm_pro", + "id": "llama-3.2-11b-vision-preview", + "name": "llama-3.2-11b-vision-preview", + "display_name": "llama-3.2-11b-vision-preview", "limit": { "context": 8192, "output": 8192 @@ -231248,15 +230169,15 @@ "supported": false }, "cost": { - "input": 1.4286, - "output": 1.4286 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "chatglm_std", - "name": "chatglm_std", - "display_name": "chatglm_std", + "id": "llama-3.2-1b-preview", + "name": "llama-3.2-1b-preview", + "display_name": "llama-3.2-1b-preview", "limit": { "context": 8192, "output": 8192 @@ -231266,15 +230187,15 @@ "supported": false }, "cost": { - "input": 0.7144, - "output": 0.7144 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "chatglm_turbo", - "name": "chatglm_turbo", - "display_name": "chatglm_turbo", + "id": "llama-3.2-3b-preview", + "name": "llama-3.2-3b-preview", + "display_name": "llama-3.2-3b-preview", "limit": { "context": 8192, "output": 8192 @@ -231284,15 +230205,15 @@ "supported": false }, "cost": { - "input": 0.7144, - "output": 0.7144 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "claude-2", - "name": "claude-2", - "display_name": "claude-2", + "id": "llama-3.2-90b-vision-preview", + "name": "llama-3.2-90b-vision-preview", + "display_name": "llama-3.2-90b-vision-preview", "limit": { "context": 8192, "output": 8192 @@ -231302,15 +230223,15 @@ "supported": false }, "cost": { - "input": 8.8, - "output": 8.8 + "input": 2.4, + "output": 2.4 }, "type": "chat" }, { - "id": "claude-2.0", - "name": "claude-2.0", - "display_name": "claude-2.0", + "id": "llama2-70b-4096", + "name": "llama2-70b-4096", + "display_name": "llama2-70b-4096", "limit": { "context": 8192, "output": 8192 @@ -231320,15 +230241,15 @@ "supported": false }, "cost": { - "input": 8.8, - "output": 39.6 + "input": 0.5, + "output": 0.5 }, "type": "chat" }, { - "id": "claude-2.1", - "name": "claude-2.1", - "display_name": "claude-2.1", + "id": "llama2-70b-40960", + "name": "llama2-70b-40960", + "display_name": "llama2-70b-40960", "limit": { "context": 8192, "output": 8192 @@ -231338,21 +230259,15 @@ "supported": false }, "cost": { - "input": 8.8, - "output": 39.6 + "input": 0.5, + "output": 0.5 }, "type": "chat" }, { - "id": "claude-3-haiku-20240229", - "name": "claude-3-haiku-20240229", - "display_name": "claude-3-haiku-20240229", - "modalities": { - "input": [ - "text", - "image" - ] - }, + "id": "llama2-7b-2048", + "name": "llama2-7b-2048", + "display_name": "llama2-7b-2048", "limit": { "context": 8192, "output": 8192 @@ -231362,21 +230277,15 @@ "supported": false }, "cost": { - "input": 0.275, - "output": 0.275 + "input": 0.1, + "output": 0.1 }, "type": "chat" }, { - "id": "claude-3-haiku-20240307", - "name": "claude-3-haiku-20240307", - "display_name": "claude-3-haiku-20240307", - "modalities": { - "input": [ - "text", - "image" - ] - }, + "id": "llama3-70b-8192", + "name": "llama3-70b-8192", + "display_name": "llama3-70b-8192", "limit": { "context": 8192, "output": 8192 @@ -231386,21 +230295,15 @@ "supported": false }, "cost": { - "input": 0.275, - "output": 1.375 + "input": 0.7, + "output": 0.937288 }, "type": "chat" }, { - "id": "claude-3-sonnet-20240229", - "name": "claude-3-sonnet-20240229", - "display_name": "claude-3-sonnet-20240229", - "modalities": { - "input": [ - "text", - "image" - ] - }, + "id": "llama3-8b-8192", + "name": "llama3-8b-8192", + "display_name": "llama3-8b-8192", "limit": { "context": 8192, "output": 8192 @@ -231410,15 +230313,15 @@ "supported": false }, "cost": { - "input": 3.3, - "output": 16.5 + "input": 0.06, + "output": 0.12 }, "type": "chat" }, { - "id": "claude-instant-1", - "name": "claude-instant-1", - "display_name": "claude-instant-1", + "id": "llama3-groq-70b-8192-tool-use-preview", + "name": "llama3-groq-70b-8192-tool-use-preview", + "display_name": "llama3-groq-70b-8192-tool-use-preview", "limit": { "context": 8192, "output": 8192 @@ -231428,15 +230331,15 @@ "supported": false }, "cost": { - "input": 1.793, - "output": 1.793 + "input": 0.00089, + "output": 0.00089 }, "type": "chat" }, { - "id": "claude-instant-1.2", - "name": "claude-instant-1.2", - "display_name": "claude-instant-1.2", + "id": "llama3-groq-8b-8192-tool-use-preview", + "name": "llama3-groq-8b-8192-tool-use-preview", + "display_name": "llama3-groq-8b-8192-tool-use-preview", "limit": { "context": 8192, "output": 8192 @@ -231446,15 +230349,15 @@ "supported": false }, "cost": { - "input": 0.88, - "output": 3.96 + "input": 0.00019, + "output": 0.00019 }, "type": "chat" }, { - "id": "code-davinci-edit-001", - "name": "code-davinci-edit-001", - "display_name": "code-davinci-edit-001", + "id": "mai-image-2", + "name": "mai-image-2", + "display_name": "mai-image-2", "limit": { "context": 8192, "output": 8192 @@ -231464,15 +230367,16 @@ "supported": false }, "cost": { - "input": 20, - "output": 20 + "input": 2, + "output": 2, + "cache_read": 0 }, - "type": "chat" + "type": "imageGeneration" }, { - "id": "cogview-3", - "name": "cogview-3", - "display_name": "cogview-3", + "id": "meta-llama/Llama-3.2-90B-Vision-Instruct", + "name": "meta-llama/Llama-3.2-90B-Vision-Instruct", + "display_name": "meta-llama/Llama-3.2-90B-Vision-Instruct", "limit": { "context": 8192, "output": 8192 @@ -231482,15 +230386,15 @@ "supported": false }, "cost": { - "input": 35.5, - "output": 35.5 + "input": 0.5, + "output": 0.5 }, "type": "chat" }, { - "id": "cogview-3-plus", - "name": "cogview-3-plus", - "display_name": "cogview-3-plus", + "id": "meta-llama/llama-3.1-405b-instruct:free", + "name": "meta-llama/llama-3.1-405b-instruct:free", + "display_name": "meta-llama/llama-3.1-405b-instruct:free", "limit": { "context": 8192, "output": 8192 @@ -231500,20 +230404,15 @@ "supported": false }, "cost": { - "input": 10, - "output": 10 + "input": 0.02, + "output": 0.02 }, "type": "chat" }, { - "id": "command", - "name": "command", - "display_name": "command", - "modalities": { - "input": [ - "text" - ] - }, + "id": "meta-llama/llama-3.1-70b-instruct:free", + "name": "meta-llama/llama-3.1-70b-instruct:free", + "display_name": "meta-llama/llama-3.1-70b-instruct:free", "limit": { "context": 8192, "output": 8192 @@ -231523,15 +230422,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 2 + "input": 0.02, + "output": 0.02 }, "type": "chat" }, { - "id": "command-light", - "name": "command-light", - "display_name": "command-light", + "id": "meta-llama/llama-3.1-8b-instruct:free", + "name": "meta-llama/llama-3.1-8b-instruct:free", + "display_name": "meta-llama/llama-3.1-8b-instruct:free", "limit": { "context": 8192, "output": 8192 @@ -231541,15 +230440,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 2 + "input": 0.02, + "output": 0.02 }, "type": "chat" }, { - "id": "command-light-nightly", - "name": "command-light-nightly", - "display_name": "command-light-nightly", + "id": "meta-llama/llama-3.2-11b-vision-instruct:free", + "name": "meta-llama/llama-3.2-11b-vision-instruct:free", + "display_name": "meta-llama/llama-3.2-11b-vision-instruct:free", "limit": { "context": 8192, "output": 8192 @@ -231559,15 +230458,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 2 + "input": 0.02, + "output": 0.02 }, "type": "chat" }, { - "id": "command-nightly", - "name": "command-nightly", - "display_name": "command-nightly", + "id": "meta-llama/llama-3.2-3b-instruct:free", + "name": "meta-llama/llama-3.2-3b-instruct:free", + "display_name": "meta-llama/llama-3.2-3b-instruct:free", "limit": { "context": 8192, "output": 8192 @@ -231577,20 +230476,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 2 + "input": 0.02, + "output": 0.02 }, "type": "chat" }, { - "id": "command-r", - "name": "command-r", - "display_name": "command-r", - "modalities": { - "input": [ - "text" - ] - }, + "id": "meta/llama-3.1-405b-instruct", + "name": "meta/llama-3.1-405b-instruct", + "display_name": "meta/llama-3.1-405b-instruct", "limit": { "context": 8192, "output": 8192 @@ -231600,20 +230494,15 @@ "supported": false }, "cost": { - "input": 0.64, - "output": 1.92 + "input": 5, + "output": 5 }, "type": "chat" }, { - "id": "command-r-08-2024", - "name": "command-r-08-2024", - "display_name": "command-r-08-2024", - "modalities": { - "input": [ - "text" - ] - }, + "id": "meta/llama3-8B-chat", + "name": "meta/llama3-8B-chat", + "display_name": "meta/llama3-8B-chat", "limit": { "context": 8192, "output": 8192 @@ -231623,20 +230512,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.8 + "input": 0.3, + "output": 0.3 }, "type": "chat" }, { - "id": "command-r-plus", - "name": "command-r-plus", - "display_name": "command-r-plus", - "modalities": { - "input": [ - "text" - ] - }, + "id": "mistralai/mistral-7b-instruct:free", + "name": "mistralai/mistral-7b-instruct:free", + "display_name": "mistralai/mistral-7b-instruct:free", "limit": { "context": 8192, "output": 8192 @@ -231646,20 +230530,15 @@ "supported": false }, "cost": { - "input": 3.84, - "output": 19.2 + "input": 0.002, + "output": 0.002 }, "type": "chat" }, { - "id": "command-r-plus-08-2024", - "name": "command-r-plus-08-2024", - "display_name": "command-r-plus-08-2024", - "modalities": { - "input": [ - "text" - ] - }, + "id": "mm-minimax-m3", + "name": "mm-minimax-m3", + "display_name": "mm-minimax-m3", "limit": { "context": 8192, "output": 8192 @@ -231669,21 +230548,34 @@ "supported": false }, "cost": { - "input": 2.8, - "output": 11.2 + "input": 0.288, + "output": 1.152 }, "type": "chat" }, { - "id": "dall-e-2", - "name": "dall-e-2", - "display_name": "dall-e-2", - "modalities": { - "input": [ - "text", - "image" - ] + "id": "moonshot-kimi-k2.5", + "name": "moonshot-kimi-k2.5", + "display_name": "moonshot-kimi-k2.5", + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 0.6, + "output": 3, + "cache_read": 0.105 }, + "type": "chat" + }, + { + "id": "moonshot-v1-128k", + "name": "moonshot-v1-128k", + "display_name": "moonshot-v1-128k", "limit": { "context": 8192, "output": 8192 @@ -231693,15 +230585,15 @@ "supported": false }, "cost": { - "input": 16, - "output": 16 + "input": 10, + "output": 10 }, - "type": "imageGeneration" + "type": "chat" }, { - "id": "davinci", - "name": "davinci", - "display_name": "davinci", + "id": "moonshot-v1-128k-vision-preview", + "name": "moonshot-v1-128k-vision-preview", + "display_name": "moonshot-v1-128k-vision-preview", "limit": { "context": 8192, "output": 8192 @@ -231711,15 +230603,15 @@ "supported": false }, "cost": { - "input": 20, - "output": 20 + "input": 10, + "output": 10 }, "type": "chat" }, { - "id": "davinci-002", - "name": "davinci-002", - "display_name": "davinci-002", + "id": "moonshot-v1-32k", + "name": "moonshot-v1-32k", + "display_name": "moonshot-v1-32k", "limit": { "context": 8192, "output": 8192 @@ -231729,15 +230621,15 @@ "supported": false }, "cost": { - "input": 2, - "output": 2 + "input": 4, + "output": 4 }, "type": "chat" }, { - "id": "deepinfra-llama-3.1-8b-instant", - "name": "deepinfra-llama-3.1-8b-instant", - "display_name": "deepinfra-llama-3.1-8b-instant", + "id": "moonshot-v1-32k-vision-preview", + "name": "moonshot-v1-32k-vision-preview", + "display_name": "moonshot-v1-32k-vision-preview", "limit": { "context": 8192, "output": 8192 @@ -231747,15 +230639,15 @@ "supported": false }, "cost": { - "input": 0.033, - "output": 0.054978 + "input": 4, + "output": 4 }, "type": "chat" }, { - "id": "deepinfra-llama-3.3-70b-instant-turbo", - "name": "deepinfra-llama-3.3-70b-instant-turbo", - "display_name": "deepinfra-llama-3.3-70b-instant-turbo", + "id": "moonshot-v1-8k", + "name": "moonshot-v1-8k", + "display_name": "moonshot-v1-8k", "limit": { "context": 8192, "output": 8192 @@ -231765,15 +230657,15 @@ "supported": false }, "cost": { - "input": 0.11, - "output": 0.352 + "input": 2, + "output": 2 }, "type": "chat" }, { - "id": "deepinfra-llama-4-maverick-17b-128e-instruct", - "name": "deepinfra-llama-4-maverick-17b-128e-instruct", - "display_name": "deepinfra-llama-4-maverick-17b-128e-instruct", + "id": "moonshot-v1-8k-vision-preview", + "name": "moonshot-v1-8k-vision-preview", + "display_name": "moonshot-v1-8k-vision-preview", "limit": { "context": 8192, "output": 8192 @@ -231783,15 +230675,15 @@ "supported": false }, "cost": { - "input": 1.65, - "output": 6.6 + "input": 2, + "output": 2 }, "type": "chat" }, { - "id": "deepinfra-llama-4-scout-17b-16e-instruct", - "name": "deepinfra-llama-4-scout-17b-16e-instruct", - "display_name": "deepinfra-llama-4-scout-17b-16e-instruct", + "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "name": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "display_name": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "limit": { "context": 8192, "output": 8192 @@ -231801,16 +230693,50 @@ "supported": false }, "cost": { - "input": 0.088, - "output": 0.33, + "input": 0.5, + "output": 0.5, "cache_read": 0 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-Coder-V2-Instruct", - "name": "deepseek-ai/DeepSeek-Coder-V2-Instruct", - "display_name": "deepseek-ai/DeepSeek-Coder-V2-Instruct", + "id": "o1-mini-2024-09-12", + "name": "o1-mini-2024-09-12", + "display_name": "o1-mini-2024-09-12", + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": true, + "default": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "default_enabled": true, + "mode": "effort", + "effort": "medium", + "effort_options": [ + "low", + "medium", + "high" + ], + "visibility": "hidden" + } + }, + "cost": { + "input": 3, + "output": 12, + "cache_read": 1.5 + }, + "type": "chat" + }, + { + "id": "omni-moderation-latest", + "name": "omni-moderation-latest", + "display_name": "omni-moderation-latest", "limit": { "context": 8192, "output": 8192 @@ -231820,15 +230746,15 @@ "supported": false }, "cost": { - "input": 0.16, - "output": 0.32 + "input": 0.02, + "output": 0.02 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "id": "qwen-flash", + "name": "qwen-flash", + "display_name": "qwen-flash", "limit": { "context": 8192, "output": 8192 @@ -231839,37 +230765,56 @@ }, "extra_capabilities": { "reasoning": { - "supported": true + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] } }, "cost": { - "input": 0.6, - "output": 0.6 + "input": 0.02, + "output": 0.2, + "cache_read": 0.02 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "id": "qwen-flash-2025-07-28", + "name": "qwen-flash-2025-07-28", + "display_name": "qwen-flash-2025-07-28", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "cost": { - "input": 0.01, - "output": 0.01 + "input": 0.02, + "output": 0.2, + "cache_read": 0.02 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "id": "qwen-long", + "name": "qwen-long", + "display_name": "qwen-long", "limit": { "context": 8192, "output": 8192 @@ -231879,15 +230824,15 @@ "supported": false }, "cost": { - "input": 0.01, - "output": 0.01 + "input": 0.1, + "output": 0.4 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "id": "qwen-max", + "name": "qwen-max", + "display_name": "qwen-max", "limit": { "context": 8192, "output": 8192 @@ -231897,15 +230842,15 @@ "supported": false }, "cost": { - "input": 0.1, - "output": 0.1 + "input": 0.38, + "output": 1.52 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "id": "qwen-max-longcontext", + "name": "qwen-max-longcontext", + "display_name": "qwen-max-longcontext", "limit": { "context": 8192, "output": 8192 @@ -231915,51 +230860,85 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 7, + "output": 21 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "id": "qwen-plus", + "name": "qwen-plus", + "display_name": "qwen-plus", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "cost": { - "input": 0.01, - "output": 0.01 + "input": 0.1126, + "output": 1.126, + "cache_read": 0.02252 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-V2-Chat", - "name": "deepseek-ai/DeepSeek-V2-Chat", - "display_name": "deepseek-ai/DeepSeek-V2-Chat", + "id": "qwen-turbo", + "name": "qwen-turbo", + "display_name": "qwen-turbo", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "cost": { - "input": 0.16, - "output": 0.32 + "input": 0.046, + "output": 0.092, + "cache_read": 0.0092 }, "type": "chat" }, { - "id": "deepseek-ai/DeepSeek-V2.5", - "name": "deepseek-ai/DeepSeek-V2.5", - "display_name": "deepseek-ai/DeepSeek-V2.5", + "id": "qwen-turbo-2024-11-01", + "name": "qwen-turbo-2024-11-01", + "display_name": "qwen-turbo-2024-11-01", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -231969,15 +230948,15 @@ "supported": false }, "cost": { - "input": 0.16, - "output": 0.32 + "input": 0.046, + "output": 0.092 }, "type": "chat" }, { - "id": "deepseek-ai/deepseek-llm-67b-chat", - "name": "deepseek-ai/deepseek-llm-67b-chat", - "display_name": "deepseek-ai/deepseek-llm-67b-chat", + "id": "qwen2.5-14b-instruct", + "name": "qwen2.5-14b-instruct", + "display_name": "qwen2.5-14b-instruct", "limit": { "context": 8192, "output": 8192 @@ -231987,15 +230966,15 @@ "supported": false }, "cost": { - "input": 0.16, - "output": 0.16 + "input": 0.4, + "output": 1.2 }, "type": "chat" }, { - "id": "deepseek-ai/deepseek-vl2", - "name": "deepseek-ai/deepseek-vl2", - "display_name": "deepseek-ai/deepseek-vl2", + "id": "qwen2.5-32b-instruct", + "name": "qwen2.5-32b-instruct", + "display_name": "qwen2.5-32b-instruct", "limit": { "context": 8192, "output": 8192 @@ -232005,15 +230984,15 @@ "supported": false }, "cost": { - "input": 0.16, - "output": 0.16 + "input": 0.6, + "output": 1.2 }, "type": "chat" }, { - "id": "deepseek-v3", - "name": "deepseek-v3", - "display_name": "deepseek-v3", + "id": "qwen2.5-3b-instruct", + "name": "qwen2.5-3b-instruct", + "display_name": "qwen2.5-3b-instruct", "limit": { "context": 8192, "output": 8192 @@ -232023,21 +231002,15 @@ "supported": false }, "cost": { - "input": 0.272, - "output": 1.088, - "cache_read": 0 + "input": 0.4, + "output": 0.8 }, "type": "chat" }, { - "id": "distil-whisper-large-v3-en", - "name": "distil-whisper-large-v3-en", - "display_name": "distil-whisper-large-v3-en", - "modalities": { - "input": [ - "audio" - ] - }, + "id": "qwen2.5-72b-instruct", + "name": "qwen2.5-72b-instruct", + "display_name": "qwen2.5-72b-instruct", "limit": { "context": 8192, "output": 8192 @@ -232047,15 +231020,15 @@ "supported": false }, "cost": { - "input": 5.556, - "output": 5.556 + "input": 0.8, + "output": 2.4 }, "type": "chat" }, { - "id": "doubao-1-5-thinking-vision-pro-250428", - "name": "doubao-1-5-thinking-vision-pro-250428", - "display_name": "doubao-1-5-thinking-vision-pro-250428", + "id": "qwen2.5-7b-instruct", + "name": "qwen2.5-7b-instruct", + "display_name": "qwen2.5-7b-instruct", "limit": { "context": 8192, "output": 8192 @@ -232065,16 +231038,15 @@ "supported": false }, "cost": { - "input": 2, - "output": 2, - "cache_read": 2 + "input": 0.4, + "output": 0.8 }, "type": "chat" }, { - "id": "fx-flux-2-pro", - "name": "fx-flux-2-pro", - "display_name": "fx-flux-2-pro", + "id": "qwen2.5-coder-1.5b-instruct", + "name": "qwen2.5-coder-1.5b-instruct", + "display_name": "qwen2.5-coder-1.5b-instruct", "limit": { "context": 8192, "output": 8192 @@ -232084,68 +231056,33 @@ "supported": false }, "cost": { - "input": 2, - "output": 0, - "cache_read": 0 + "input": 0.2, + "output": 0.4 }, "type": "chat" }, { - "id": "gemini-2.5-pro-exp-03-25", - "name": "gemini-2.5-pro-exp-03-25", - "display_name": "gemini-2.5-pro-exp-03-25", - "modalities": { - "input": [ - "text", - "image", - "audio", - "video" - ] - }, + "id": "qwen2.5-coder-7b-instruct", + "name": "qwen2.5-coder-7b-instruct", + "display_name": "qwen2.5-coder-7b-instruct", "limit": { "context": 8192, "output": 8192 }, - "tool_call": true, + "tool_call": false, "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "default_enabled": true, - "mode": "budget", - "budget": { - "default": -1, - "min": 128, - "max": 32768, - "auto": -1, - "unit": "tokens" - }, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thought_signatures" - ] - } + "supported": false }, "cost": { - "input": 1.25, - "output": 5, - "cache_read": 0.125 + "input": 0.2, + "output": 0.4 }, "type": "chat" }, { - "id": "gemini-embedding-exp-03-07", - "name": "gemini-embedding-exp-03-07", - "display_name": "gemini-embedding-exp-03-07", - "modalities": { - "input": [ - "text" - ] - }, + "id": "qwen2.5-math-1.5b-instruct", + "name": "qwen2.5-math-1.5b-instruct", + "display_name": "qwen2.5-math-1.5b-instruct", "limit": { "context": 8192, "output": 8192 @@ -232155,15 +231092,15 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 0.2, + "output": 0.2 }, - "type": "embedding" + "type": "chat" }, { - "id": "gemini-exp-1114", - "name": "gemini-exp-1114", - "display_name": "gemini-exp-1114", + "id": "qwen2.5-math-72b-instruct", + "name": "qwen2.5-math-72b-instruct", + "display_name": "qwen2.5-math-72b-instruct", "limit": { "context": 8192, "output": 8192 @@ -232173,15 +231110,15 @@ "supported": false }, "cost": { - "input": 1.25, - "output": 5 + "input": 0.8, + "output": 2.4 }, "type": "chat" }, { - "id": "gemini-exp-1121", - "name": "gemini-exp-1121", - "display_name": "gemini-exp-1121", + "id": "qwen2.5-math-7b-instruct", + "name": "qwen2.5-math-7b-instruct", + "display_name": "qwen2.5-math-7b-instruct", "limit": { "context": 8192, "output": 8192 @@ -232191,15 +231128,15 @@ "supported": false }, "cost": { - "input": 1.25, - "output": 5 + "input": 0.2, + "output": 0.4 }, "type": "chat" }, { - "id": "gemini-pro", - "name": "gemini-pro", - "display_name": "gemini-pro", + "id": "step-2-16k", + "name": "step-2-16k", + "display_name": "step-2-16k", "limit": { "context": 8192, "output": 8192 @@ -232209,15 +231146,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.6 + "input": 2, + "output": 2 }, "type": "chat" }, { - "id": "gemini-pro-vision", - "name": "gemini-pro-vision", - "display_name": "gemini-pro-vision", + "id": "text-ada-001", + "name": "text-ada-001", + "display_name": "text-ada-001", "limit": { "context": 8192, "output": 8192 @@ -232227,15 +231164,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 1 + "input": 0.4, + "output": 0.4 }, "type": "chat" }, { - "id": "gemma-7b-it", - "name": "gemma-7b-it", - "display_name": "gemma-7b-it", + "id": "text-babbage-001", + "name": "text-babbage-001", + "display_name": "text-babbage-001", "limit": { "context": 8192, "output": 8192 @@ -232245,15 +231182,15 @@ "supported": false }, "cost": { - "input": 0.1, - "output": 0.1 + "input": 0.5, + "output": 0.5 }, "type": "chat" }, { - "id": "glm-3-turbo", - "name": "glm-3-turbo", - "display_name": "glm-3-turbo", + "id": "text-curie-001", + "name": "text-curie-001", + "display_name": "text-curie-001", "limit": { "context": 8192, "output": 8192 @@ -232263,15 +231200,15 @@ "supported": false }, "cost": { - "input": 0.71, - "output": 0.71 + "input": 2, + "output": 2 }, "type": "chat" }, { - "id": "glm-4", - "name": "glm-4", - "display_name": "glm-4", + "id": "text-davinci-002", + "name": "text-davinci-002", + "display_name": "text-davinci-002", "limit": { "context": 8192, "output": 8192 @@ -232281,15 +231218,15 @@ "supported": false }, "cost": { - "input": 14.2, - "output": 14.2 + "input": 20, + "output": 20 }, "type": "chat" }, { - "id": "glm-4-flash", - "name": "glm-4-flash", - "display_name": "glm-4-flash", + "id": "text-davinci-003", + "name": "text-davinci-003", + "display_name": "text-davinci-003", "limit": { "context": 8192, "output": 8192 @@ -232299,15 +231236,15 @@ "supported": false }, "cost": { - "input": 0.1, - "output": 0.1 + "input": 20, + "output": 20 }, "type": "chat" }, { - "id": "glm-4-plus", - "name": "glm-4-plus", - "display_name": "glm-4-plus", + "id": "text-davinci-edit-001", + "name": "text-davinci-edit-001", + "display_name": "text-davinci-edit-001", "limit": { "context": 8192, "output": 8192 @@ -232317,15 +231254,15 @@ "supported": false }, "cost": { - "input": 8, - "output": 8 + "input": 20, + "output": 20 }, "type": "chat" }, { - "id": "glm-4.5-airx", - "name": "glm-4.5-airx", - "display_name": "glm-4.5-airx", + "id": "text-embedding-3-large", + "name": "text-embedding-3-large", + "display_name": "text-embedding-3-large", "modalities": { "input": [ "text" @@ -232340,16 +231277,20 @@ "supported": false }, "cost": { - "input": 1.1, - "output": 4.51, - "cache_read": 0.22 + "input": 0.13, + "output": 0.13 }, - "type": "chat" + "type": "embedding" }, { - "id": "glm-4v", - "name": "glm-4v", - "display_name": "glm-4v", + "id": "text-embedding-3-small", + "name": "text-embedding-3-small", + "display_name": "text-embedding-3-small", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -232359,15 +231300,20 @@ "supported": false }, "cost": { - "input": 14.2, - "output": 14.2 + "input": 0.02, + "output": 0.02 }, - "type": "chat" + "type": "embedding" }, { - "id": "glm-4v-plus", - "name": "glm-4v-plus", - "display_name": "glm-4v-plus", + "id": "text-embedding-ada-002", + "name": "text-embedding-ada-002", + "display_name": "text-embedding-ada-002", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -232377,15 +231323,20 @@ "supported": false }, "cost": { - "input": 2, - "output": 2 + "input": 0.1, + "output": 0.1 }, - "type": "chat" + "type": "embedding" }, { - "id": "google-gemma-3-12b-it", - "name": "google-gemma-3-12b-it", - "display_name": "google-gemma-3-12b-it", + "id": "text-embedding-v1", + "name": "text-embedding-v1", + "display_name": "text-embedding-v1", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -232395,15 +231346,20 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 0.1, + "output": 0.1 }, - "type": "chat" + "type": "embedding" }, { - "id": "google-gemma-3-27b-it", - "name": "google-gemma-3-27b-it", - "display_name": "google-gemma-3-27b-it", + "id": "tts-1-hd-1106", + "name": "tts-1-hd-1106", + "display_name": "tts-1-hd-1106", + "modalities": { + "input": [ + "audio" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -232413,16 +231369,41 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2, - "cache_read": 0 + "input": 30, + "output": 30 + } + }, + { + "id": "tts-1-hd", + "name": "tts-1-hd", + "display_name": "tts-1-hd", + "modalities": { + "input": [ + "audio" + ] }, - "type": "chat" + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 30, + "output": 30 + } }, { - "id": "google-gemma-3-4b-it", - "name": "google-gemma-3-4b-it", - "display_name": "google-gemma-3-4b-it", + "id": "whisper-1", + "name": "whisper-1", + "display_name": "whisper-1", + "modalities": { + "input": [ + "audio" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -232432,16 +231413,20 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2, - "cache_read": 0 + "input": 100, + "output": 100 }, "type": "chat" }, { - "id": "google/gemini-exp-1114", - "name": "google/gemini-exp-1114", - "display_name": "google/gemini-exp-1114", + "id": "whisper-large-v3", + "name": "whisper-large-v3", + "display_name": "whisper-large-v3", + "modalities": { + "input": [ + "audio" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -232451,15 +231436,20 @@ "supported": false }, "cost": { - "input": 1.25, - "output": 5 + "input": 30.834, + "output": 30.834 }, "type": "chat" }, { - "id": "google/gemma-2-27b-it", - "name": "google/gemma-2-27b-it", - "display_name": "google/gemma-2-27b-it", + "id": "whisper-large-v3-turbo", + "name": "whisper-large-v3-turbo", + "display_name": "whisper-large-v3-turbo", + "modalities": { + "input": [ + "audio" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -232469,15 +231459,20 @@ "supported": false }, "cost": { - "input": 0.8, - "output": 0.8 + "input": 5.556, + "output": 5.556 }, "type": "chat" }, { - "id": "google/gemma-2-9b-it:free", - "name": "google/gemma-2-9b-it:free", - "display_name": "google/gemma-2-9b-it:free", + "id": "tts-1-1106", + "name": "tts-1-1106", + "display_name": "tts-1-1106", + "modalities": { + "input": [ + "audio" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -232487,15 +231482,36 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 15, + "output": 15 + } + }, + { + "id": "tts-1", + "name": "tts-1", + "display_name": "tts-1", + "modalities": { + "input": [ + "audio" + ] }, - "type": "chat" + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 15, + "output": 15 + } }, { - "id": "gpt-3.5-turbo", - "name": "gpt-3.5-turbo", - "display_name": "gpt-3.5-turbo", + "id": "text-search-ada-doc-001", + "name": "text-search-ada-doc-001", + "display_name": "text-search-ada-doc-001", "limit": { "context": 8192, "output": 8192 @@ -232505,15 +231521,15 @@ "supported": false }, "cost": { - "input": 0.5, - "output": 1.5 + "input": 20, + "output": 20 }, "type": "chat" }, { - "id": "gpt-3.5-turbo-0301", - "name": "gpt-3.5-turbo-0301", - "display_name": "gpt-3.5-turbo-0301", + "id": "text-moderation-stable", + "name": "text-moderation-stable", + "display_name": "text-moderation-stable", "limit": { "context": 8192, "output": 8192 @@ -232523,15 +231539,15 @@ "supported": false }, "cost": { - "input": 1.5, - "output": 1.5 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "gpt-3.5-turbo-0613", - "name": "gpt-3.5-turbo-0613", - "display_name": "gpt-3.5-turbo-0613", + "id": "text-moderation-latest", + "name": "text-moderation-latest", + "display_name": "text-moderation-latest", "limit": { "context": 8192, "output": 8192 @@ -232541,15 +231557,15 @@ "supported": false }, "cost": { - "input": 1.5, - "output": 2 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "gpt-3.5-turbo-1106", - "name": "gpt-3.5-turbo-1106", - "display_name": "gpt-3.5-turbo-1106", + "id": "yi-large", + "name": "yi-large", + "display_name": "yi-large", "limit": { "context": 8192, "output": 8192 @@ -232559,15 +231575,15 @@ "supported": false }, "cost": { - "input": 1, - "output": 2 + "input": 3, + "output": 3 }, "type": "chat" }, { - "id": "gpt-3.5-turbo-16k", - "name": "gpt-3.5-turbo-16k", - "display_name": "gpt-3.5-turbo-16k", + "id": "yi-large-rag", + "name": "yi-large-rag", + "display_name": "yi-large-rag", "limit": { "context": 8192, "output": 8192 @@ -232577,15 +231593,15 @@ "supported": false }, "cost": { - "input": 3, + "input": 4, "output": 4 }, "type": "chat" }, { - "id": "gpt-3.5-turbo-16k-0613", - "name": "gpt-3.5-turbo-16k-0613", - "display_name": "gpt-3.5-turbo-16k-0613", + "id": "yi-large-turbo", + "name": "yi-large-turbo", + "display_name": "yi-large-turbo", "limit": { "context": 8192, "output": 8192 @@ -232595,15 +231611,15 @@ "supported": false }, "cost": { - "input": 3, - "output": 4 + "input": 1.8, + "output": 1.8 }, "type": "chat" }, { - "id": "gpt-3.5-turbo-instruct", - "name": "gpt-3.5-turbo-instruct", - "display_name": "gpt-3.5-turbo-instruct", + "id": "yi-lightning", + "name": "yi-lightning", + "display_name": "yi-lightning", "limit": { "context": 8192, "output": 8192 @@ -232613,15 +231629,15 @@ "supported": false }, "cost": { - "input": 1.5, - "output": 2 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "gpt-4", - "name": "gpt-4", - "display_name": "gpt-4", + "id": "yi-medium", + "name": "yi-medium", + "display_name": "yi-medium", "limit": { "context": 8192, "output": 8192 @@ -232631,15 +231647,15 @@ "supported": false }, "cost": { - "input": 30, - "output": 60 + "input": 0.4, + "output": 0.4 }, "type": "chat" }, { - "id": "gpt-4-0125-preview", - "name": "gpt-4-0125-preview", - "display_name": "gpt-4-0125-preview", + "id": "yi-vl-plus", + "name": "yi-vl-plus", + "display_name": "yi-vl-plus", "limit": { "context": 8192, "output": 8192 @@ -232649,15 +231665,15 @@ "supported": false }, "cost": { - "input": 10, - "output": 30 + "input": 0.000852, + "output": 0.000852 }, "type": "chat" }, { - "id": "gpt-4-0314", - "name": "gpt-4-0314", - "display_name": "gpt-4-0314", + "id": "text-moderation-007", + "name": "text-moderation-007", + "display_name": "text-moderation-007", "limit": { "context": 8192, "output": 8192 @@ -232667,15 +231683,15 @@ "supported": false }, "cost": { - "input": 30, - "output": 60 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "gpt-4-0613", - "name": "gpt-4-0613", - "display_name": "gpt-4-0613", + "id": "Baichuan3-Turbo", + "name": "Baichuan3-Turbo", + "display_name": "Baichuan3-Turbo", "limit": { "context": 8192, "output": 8192 @@ -232685,15 +231701,15 @@ "supported": false }, "cost": { - "input": 30, - "output": 60 + "input": 1.9, + "output": 1.9 }, "type": "chat" }, { - "id": "gpt-4-1106-preview", - "name": "gpt-4-1106-preview", - "display_name": "gpt-4-1106-preview", + "id": "Baichuan3-Turbo-128k", + "name": "Baichuan3-Turbo-128k", + "display_name": "Baichuan3-Turbo-128k", "limit": { "context": 8192, "output": 8192 @@ -232703,15 +231719,15 @@ "supported": false }, "cost": { - "input": 10, - "output": 30 + "input": 3.8, + "output": 3.8 }, "type": "chat" }, { - "id": "gpt-4-32k-0314", - "name": "gpt-4-32k-0314", - "display_name": "gpt-4-32k-0314", + "id": "Baichuan4", + "name": "Baichuan4", + "display_name": "Baichuan4", "limit": { "context": 8192, "output": 8192 @@ -232721,15 +231737,15 @@ "supported": false }, "cost": { - "input": 60, - "output": 120 + "input": 16, + "output": 16 }, "type": "chat" }, { - "id": "gpt-4-32k-0613", - "name": "gpt-4-32k-0613", - "display_name": "gpt-4-32k-0613", + "id": "Baichuan4-Air", + "name": "Baichuan4-Air", + "display_name": "Baichuan4-Air", "limit": { "context": 8192, "output": 8192 @@ -232739,15 +231755,15 @@ "supported": false }, "cost": { - "input": 60, - "output": 120 + "input": 0.16, + "output": 0.16 }, "type": "chat" }, { - "id": "gpt-4-turbo", - "name": "gpt-4-turbo", - "display_name": "gpt-4-turbo", + "id": "Baichuan4-Turbo", + "name": "Baichuan4-Turbo", + "display_name": "Baichuan4-Turbo", "limit": { "context": 8192, "output": 8192 @@ -232757,15 +231773,15 @@ "supported": false }, "cost": { - "input": 10, - "output": 30 + "input": 2.4, + "output": 2.4 }, "type": "chat" }, { - "id": "gpt-4-turbo-2024-04-09", - "name": "gpt-4-turbo-2024-04-09", - "display_name": "gpt-4-turbo-2024-04-09", + "id": "DeepSeek-v3", + "name": "DeepSeek-v3", + "display_name": "DeepSeek-v3", "limit": { "context": 8192, "output": 8192 @@ -232775,15 +231791,15 @@ "supported": false }, "cost": { - "input": 10, - "output": 30 + "input": 0.272, + "output": 1.088 }, "type": "chat" }, { - "id": "gpt-4-turbo-preview", - "name": "gpt-4-turbo-preview", - "display_name": "gpt-4-turbo-preview", + "id": "Doubao-1.5-lite-32k", + "name": "Doubao-1.5-lite-32k", + "display_name": "Doubao-1.5-lite-32k", "limit": { "context": 8192, "output": 8192 @@ -232793,15 +231809,16 @@ "supported": false }, "cost": { - "input": 10, - "output": 30 + "input": 0.05, + "output": 0.1, + "cache_read": 0.01 }, "type": "chat" }, { - "id": "gpt-4-vision-preview", - "name": "gpt-4-vision-preview", - "display_name": "gpt-4-vision-preview", + "id": "Doubao-1.5-pro-256k", + "name": "Doubao-1.5-pro-256k", + "display_name": "Doubao-1.5-pro-256k", "limit": { "context": 8192, "output": 8192 @@ -232811,40 +231828,35 @@ "supported": false }, "cost": { - "input": 10, - "output": 30 + "input": 0.8, + "output": 1.44, + "cache_read": 0.8 }, "type": "chat" }, { - "id": "gpt-4o-2024-05-13", - "name": "gpt-4o-2024-05-13", - "display_name": "gpt-4o-2024-05-13", + "id": "Doubao-1.5-pro-32k", + "name": "Doubao-1.5-pro-32k", + "display_name": "Doubao-1.5-pro-32k", "limit": { - "context": 128000, - "output": 128000 + "context": 8192, + "output": 8192 }, "tool_call": false, "reasoning": { "supported": false }, "cost": { - "input": 5, - "output": 15, - "cache_read": 5 + "input": 0.134, + "output": 0.335, + "cache_read": 0.0268 }, "type": "chat" }, { - "id": "gpt-4o-mini-2024-07-18", - "name": "gpt-4o-mini-2024-07-18", - "display_name": "gpt-4o-mini-2024-07-18", - "modalities": { - "input": [ - "text", - "image" - ] - }, + "id": "Doubao-1.5-vision-pro-32k", + "name": "Doubao-1.5-vision-pro-32k", + "display_name": "Doubao-1.5-vision-pro-32k", "limit": { "context": 8192, "output": 8192 @@ -232854,51 +231866,34 @@ "supported": false }, "cost": { - "input": 0.15, - "output": 0.6, - "cache_read": 0.075 + "input": 0.46, + "output": 1.38 }, "type": "chat" }, { - "id": "gpt-oss-20b", - "name": "gpt-oss-20b", - "display_name": "gpt-oss-20b", - "modalities": { - "input": [ - "text" - ] - }, + "id": "Doubao-lite-128k", + "name": "Doubao-lite-128k", + "display_name": "Doubao-lite-128k", "limit": { - "context": 128000, - "output": 128000 + "context": 8192, + "output": 8192 }, - "tool_call": true, + "tool_call": false, "reasoning": { - "supported": true, - "default": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true - } + "supported": false }, "cost": { - "input": 0.11, - "output": 0.55 + "input": 0.14, + "output": 0.28, + "cache_read": 0.14 }, "type": "chat" }, { - "id": "grok-2-vision-1212", - "name": "grok-2-vision-1212", - "display_name": "grok-2-vision-1212", - "modalities": { - "input": [ - "text", - "image" - ] - }, + "id": "Doubao-lite-32k", + "name": "Doubao-lite-32k", + "display_name": "Doubao-lite-32k", "limit": { "context": 8192, "output": 8192 @@ -232908,21 +231903,16 @@ "supported": false }, "cost": { - "input": 1.8, - "output": 9 + "input": 0.06, + "output": 0.12, + "cache_read": 0.012 }, "type": "chat" }, { - "id": "grok-vision-beta", - "name": "grok-vision-beta", - "display_name": "grok-vision-beta", - "modalities": { - "input": [ - "text", - "image" - ] - }, + "id": "Doubao-lite-4k", + "name": "Doubao-lite-4k", + "display_name": "Doubao-lite-4k", "limit": { "context": 8192, "output": 8192 @@ -232932,15 +231922,16 @@ "supported": false }, "cost": { - "input": 5.6, - "output": 16.8 + "input": 0.06, + "output": 0.12, + "cache_read": 0.06 }, "type": "chat" }, { - "id": "groq-llama-3.1-8b-instant", - "name": "groq-llama-3.1-8b-instant", - "display_name": "groq-llama-3.1-8b-instant", + "id": "Doubao-pro-128k", + "name": "Doubao-pro-128k", + "display_name": "Doubao-pro-128k", "limit": { "context": 8192, "output": 8192 @@ -232950,15 +231941,15 @@ "supported": false }, "cost": { - "input": 0.055, - "output": 0.088 + "input": 0.8, + "output": 1.44 }, "type": "chat" }, { - "id": "groq-llama-3.3-70b-versatile", - "name": "groq-llama-3.3-70b-versatile", - "display_name": "groq-llama-3.3-70b-versatile", + "id": "Doubao-pro-256k", + "name": "Doubao-pro-256k", + "display_name": "Doubao-pro-256k", "limit": { "context": 8192, "output": 8192 @@ -232968,15 +231959,16 @@ "supported": false }, "cost": { - "input": 0.649, - "output": 0.869011 + "input": 0.8, + "output": 1.44, + "cache_read": 0.8 }, "type": "chat" }, { - "id": "groq-llama-4-maverick-17b-128e-instruct", - "name": "groq-llama-4-maverick-17b-128e-instruct", - "display_name": "groq-llama-4-maverick-17b-128e-instruct", + "id": "Doubao-pro-32k", + "name": "Doubao-pro-32k", + "display_name": "Doubao-pro-32k", "limit": { "context": 8192, "output": 8192 @@ -232986,15 +231978,16 @@ "supported": false }, "cost": { - "input": 0.22, - "output": 0.66 + "input": 0.14, + "output": 0.35, + "cache_read": 0.028 }, "type": "chat" }, { - "id": "groq-llama-4-scout-17b-16e-instruct", - "name": "groq-llama-4-scout-17b-16e-instruct", - "display_name": "groq-llama-4-scout-17b-16e-instruct", + "id": "Doubao-pro-4k", + "name": "Doubao-pro-4k", + "display_name": "Doubao-pro-4k", "limit": { "context": 8192, "output": 8192 @@ -233004,21 +231997,38 @@ "supported": false }, "cost": { - "input": 0.122, - "output": 0.366 + "input": 0.14, + "output": 0.35 }, "type": "chat" }, { - "id": "imagen-4.0-generate-preview-05-20", - "name": "imagen-4.0-generate-preview-05-20", - "display_name": "imagen-4.0-generate-preview-05-20", - "modalities": { - "input": [ - "text", - "image" - ] + "id": "GPT-OSS-20B", + "name": "GPT-OSS-20B", + "display_name": "GPT-OSS-20B", + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } }, + "cost": { + "input": 0.11, + "output": 0.55 + }, + "type": "chat" + }, + { + "id": "Gryphe/MythoMax-L2-13b", + "name": "Gryphe/MythoMax-L2-13b", + "display_name": "Gryphe/MythoMax-L2-13b", "limit": { "context": 8192, "output": 8192 @@ -233028,16 +232038,15 @@ "supported": false }, "cost": { - "input": 2, - "output": 2, - "cache_read": 0 + "input": 0.4, + "output": 0.4 }, - "type": "imageGeneration" + "type": "chat" }, { - "id": "jina-embeddings-v2-base-code", - "name": "jina-embeddings-v2-base-code", - "display_name": "jina-embeddings-v2-base-code", + "id": "MiniMax-Text-01", + "name": "MiniMax-Text-01", + "display_name": "MiniMax-Text-01", "modalities": { "input": [ "text" @@ -233052,15 +232061,15 @@ "supported": false }, "cost": { - "input": 0.05, - "output": 0.05 + "input": 0.14, + "output": 1.12 }, - "type": "embedding" + "type": "chat" }, { - "id": "learnlm-1.5-pro-experimental", - "name": "learnlm-1.5-pro-experimental", - "display_name": "learnlm-1.5-pro-experimental", + "id": "Mistral-large-2407", + "name": "Mistral-large-2407", + "display_name": "Mistral-large-2407", "limit": { "context": 8192, "output": 8192 @@ -233070,15 +232079,15 @@ "supported": false }, "cost": { - "input": 1.25, - "output": 5 + "input": 3, + "output": 9 }, "type": "chat" }, { - "id": "llama-3.1-405b-instruct", - "name": "llama-3.1-405b-instruct", - "display_name": "llama-3.1-405b-instruct", + "id": "Qwen/Qwen2-1.5B-Instruct", + "name": "Qwen/Qwen2-1.5B-Instruct", + "display_name": "Qwen/Qwen2-1.5B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233088,15 +232097,15 @@ "supported": false }, "cost": { - "input": 4, - "output": 4 + "input": 0.2, + "output": 0.2 }, "type": "chat" }, { - "id": "llama-3.1-405b-reasoning", - "name": "llama-3.1-405b-reasoning", - "display_name": "llama-3.1-405b-reasoning", + "id": "Qwen/Qwen2-57B-A14B-Instruct", + "name": "Qwen/Qwen2-57B-A14B-Instruct", + "display_name": "Qwen/Qwen2-57B-A14B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233106,15 +232115,15 @@ "supported": false }, "cost": { - "input": 4, - "output": 4 + "input": 0.24, + "output": 0.24 }, "type": "chat" }, { - "id": "llama-3.1-70b-versatile", - "name": "llama-3.1-70b-versatile", - "display_name": "llama-3.1-70b-versatile", + "id": "Qwen/Qwen2-72B-Instruct", + "name": "Qwen/Qwen2-72B-Instruct", + "display_name": "Qwen/Qwen2-72B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233124,15 +232133,15 @@ "supported": false }, "cost": { - "input": 0.6, - "output": 0.6 + "input": 0.8, + "output": 0.8 }, "type": "chat" }, { - "id": "llama-3.1-8b-instant", - "name": "llama-3.1-8b-instant", - "display_name": "llama-3.1-8b-instant", + "id": "Qwen/Qwen2-7B-Instruct", + "name": "Qwen/Qwen2-7B-Instruct", + "display_name": "Qwen/Qwen2-7B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233142,15 +232151,15 @@ "supported": false }, "cost": { - "input": 0.3, - "output": 0.6 + "input": 0.08, + "output": 0.08 }, "type": "chat" }, { - "id": "llama-3.1-sonar-small-128k-online", - "name": "llama-3.1-sonar-small-128k-online", - "display_name": "llama-3.1-sonar-small-128k-online", + "id": "Qwen/Qwen2.5-32B-Instruct", + "name": "Qwen/Qwen2.5-32B-Instruct", + "display_name": "Qwen/Qwen2.5-32B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233160,15 +232169,15 @@ "supported": false }, "cost": { - "input": 0.3, - "output": 0.3 + "input": 0.6, + "output": 0.6 }, "type": "chat" }, { - "id": "llama-3.2-11b-vision-preview", - "name": "llama-3.2-11b-vision-preview", - "display_name": "llama-3.2-11b-vision-preview", + "id": "Qwen/Qwen2.5-72B-Instruct", + "name": "Qwen/Qwen2.5-72B-Instruct", + "display_name": "Qwen/Qwen2.5-72B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233178,15 +232187,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 0.8, + "output": 0.8 }, "type": "chat" }, { - "id": "llama-3.2-1b-preview", - "name": "llama-3.2-1b-preview", - "display_name": "llama-3.2-1b-preview", + "id": "Qwen/Qwen2.5-72B-Instruct-128K", + "name": "Qwen/Qwen2.5-72B-Instruct-128K", + "display_name": "Qwen/Qwen2.5-72B-Instruct-128K", "limit": { "context": 8192, "output": 8192 @@ -233196,15 +232205,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 0.8, + "output": 0.8 }, "type": "chat" }, { - "id": "llama-3.2-3b-preview", - "name": "llama-3.2-3b-preview", - "display_name": "llama-3.2-3b-preview", + "id": "Qwen/Qwen2.5-7B-Instruct", + "name": "Qwen/Qwen2.5-7B-Instruct", + "display_name": "Qwen/Qwen2.5-7B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233214,15 +232223,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 0.4, + "output": 0.4 }, "type": "chat" }, { - "id": "llama-3.2-90b-vision-preview", - "name": "llama-3.2-90b-vision-preview", - "display_name": "llama-3.2-90b-vision-preview", + "id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "name": "Qwen/Qwen2.5-Coder-32B-Instruct", + "display_name": "Qwen/Qwen2.5-Coder-32B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233232,33 +232241,50 @@ "supported": false }, "cost": { - "input": 2.4, - "output": 2.4 + "input": 0.16, + "output": 0.16 }, "type": "chat" }, { - "id": "llama2-70b-4096", - "name": "llama2-70b-4096", - "display_name": "llama2-70b-4096", + "id": "Qwen3-235B-A22B-Thinking-2507", + "name": "Qwen3-235B-A22B-Thinking-2507", + "display_name": "Qwen3-235B-A22B-Thinking-2507", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": false + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true, + "interleaved": true, + "summaries": true, + "visibility": "summary", + "continuation": [ + "thinking_blocks" + ] + } }, "cost": { - "input": 0.5, - "output": 0.5 + "input": 0.28, + "output": 2.8 }, "type": "chat" }, { - "id": "llama2-70b-40960", - "name": "llama2-70b-40960", - "display_name": "llama2-70b-40960", + "id": "Stable-Diffusion-3-5-Large", + "name": "Stable-Diffusion-3-5-Large", + "display_name": "Stable-Diffusion-3-5-Large", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -233268,15 +232294,16 @@ "supported": false }, "cost": { - "input": 0.5, - "output": 0.5 + "input": 4, + "output": 4, + "cache_read": 0 }, - "type": "chat" + "type": "imageGeneration" }, { - "id": "llama2-7b-2048", - "name": "llama2-7b-2048", - "display_name": "llama2-7b-2048", + "id": "WizardLM/WizardCoder-Python-34B-V1.0", + "name": "WizardLM/WizardCoder-Python-34B-V1.0", + "display_name": "WizardLM/WizardCoder-Python-34B-V1.0", "limit": { "context": 8192, "output": 8192 @@ -233286,15 +232313,15 @@ "supported": false }, "cost": { - "input": 0.1, - "output": 0.1 + "input": 0.9, + "output": 0.9 }, "type": "chat" }, { - "id": "llama3-70b-8192", - "name": "llama3-70b-8192", - "display_name": "llama3-70b-8192", + "id": "ahm-Phi-3-5-MoE-instruct", + "name": "ahm-Phi-3-5-MoE-instruct", + "display_name": "ahm-Phi-3-5-MoE-instruct", "limit": { "context": 8192, "output": 8192 @@ -233304,15 +232331,15 @@ "supported": false }, "cost": { - "input": 0.7, - "output": 0.937288 + "input": 0.4, + "output": 1.6 }, "type": "chat" }, { - "id": "llama3-8b-8192", - "name": "llama3-8b-8192", - "display_name": "llama3-8b-8192", + "id": "ahm-Phi-3-5-mini-instruct", + "name": "ahm-Phi-3-5-mini-instruct", + "display_name": "ahm-Phi-3-5-mini-instruct", "limit": { "context": 8192, "output": 8192 @@ -233322,15 +232349,21 @@ "supported": false }, "cost": { - "input": 0.06, - "output": 0.12 + "input": 1, + "output": 3 }, "type": "chat" }, { - "id": "llama3-groq-70b-8192-tool-use-preview", - "name": "llama3-groq-70b-8192-tool-use-preview", - "display_name": "llama3-groq-70b-8192-tool-use-preview", + "id": "ahm-Phi-3-5-vision-instruct", + "name": "ahm-Phi-3-5-vision-instruct", + "display_name": "ahm-Phi-3-5-vision-instruct", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -233340,15 +232373,15 @@ "supported": false }, "cost": { - "input": 0.00089, - "output": 0.00089 + "input": 0.4, + "output": 1.6 }, "type": "chat" }, { - "id": "llama3-groq-8b-8192-tool-use-preview", - "name": "llama3-groq-8b-8192-tool-use-preview", - "display_name": "llama3-groq-8b-8192-tool-use-preview", + "id": "ahm-Phi-3-medium-128k", + "name": "ahm-Phi-3-medium-128k", + "display_name": "ahm-Phi-3-medium-128k", "limit": { "context": 8192, "output": 8192 @@ -233358,15 +232391,15 @@ "supported": false }, "cost": { - "input": 0.00019, - "output": 0.00019 + "input": 6, + "output": 18 }, "type": "chat" }, { - "id": "mai-image-2", - "name": "mai-image-2", - "display_name": "mai-image-2", + "id": "ahm-Phi-3-medium-4k", + "name": "ahm-Phi-3-medium-4k", + "display_name": "ahm-Phi-3-medium-4k", "limit": { "context": 8192, "output": 8192 @@ -233376,16 +232409,15 @@ "supported": false }, "cost": { - "input": 2, - "output": 2, - "cache_read": 0 + "input": 1, + "output": 3 }, - "type": "imageGeneration" + "type": "chat" }, { - "id": "meta-llama/Llama-3.2-90B-Vision-Instruct", - "name": "meta-llama/Llama-3.2-90B-Vision-Instruct", - "display_name": "meta-llama/Llama-3.2-90B-Vision-Instruct", + "id": "ahm-Phi-3-small-128k", + "name": "ahm-Phi-3-small-128k", + "display_name": "ahm-Phi-3-small-128k", "limit": { "context": 8192, "output": 8192 @@ -233395,15 +232427,15 @@ "supported": false }, "cost": { - "input": 0.5, - "output": 0.5 + "input": 1, + "output": 3 }, "type": "chat" }, { - "id": "meta-llama/llama-3.1-405b-instruct:free", - "name": "meta-llama/llama-3.1-405b-instruct:free", - "display_name": "meta-llama/llama-3.1-405b-instruct:free", + "id": "aihubmix-Codestral-2501", + "name": "aihubmix-Codestral-2501", + "display_name": "aihubmix-Codestral-2501", "limit": { "context": 8192, "output": 8192 @@ -233413,15 +232445,38 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 0.4, + "output": 1.2 + }, + "type": "chat" + }, + { + "id": "aihubmix-Cohere-command-r", + "name": "aihubmix-Cohere-command-r", + "display_name": "aihubmix-Cohere-command-r", + "modalities": { + "input": [ + "text" + ] + }, + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 0.64, + "output": 1.92 }, "type": "chat" }, { - "id": "meta-llama/llama-3.1-70b-instruct:free", - "name": "meta-llama/llama-3.1-70b-instruct:free", - "display_name": "meta-llama/llama-3.1-70b-instruct:free", + "id": "aihubmix-Jamba-1-5-Large", + "name": "aihubmix-Jamba-1-5-Large", + "display_name": "aihubmix-Jamba-1-5-Large", "limit": { "context": 8192, "output": 8192 @@ -233431,15 +232486,15 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 2.2, + "output": 8.8 }, "type": "chat" }, { - "id": "meta-llama/llama-3.1-8b-instruct:free", - "name": "meta-llama/llama-3.1-8b-instruct:free", - "display_name": "meta-llama/llama-3.1-8b-instruct:free", + "id": "aihubmix-Llama-3-1-405B-Instruct", + "name": "aihubmix-Llama-3-1-405B-Instruct", + "display_name": "aihubmix-Llama-3-1-405B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233449,15 +232504,15 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 5, + "output": 15 }, "type": "chat" }, { - "id": "meta-llama/llama-3.2-11b-vision-instruct:free", - "name": "meta-llama/llama-3.2-11b-vision-instruct:free", - "display_name": "meta-llama/llama-3.2-11b-vision-instruct:free", + "id": "aihubmix-Llama-3-1-70B-Instruct", + "name": "aihubmix-Llama-3-1-70B-Instruct", + "display_name": "aihubmix-Llama-3-1-70B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233467,15 +232522,15 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 0.6, + "output": 0.78 }, "type": "chat" }, { - "id": "meta-llama/llama-3.2-3b-instruct:free", - "name": "meta-llama/llama-3.2-3b-instruct:free", - "display_name": "meta-llama/llama-3.2-3b-instruct:free", + "id": "aihubmix-Llama-3-1-8B-Instruct", + "name": "aihubmix-Llama-3-1-8B-Instruct", + "display_name": "aihubmix-Llama-3-1-8B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233485,15 +232540,15 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 0.3, + "output": 0.6 }, "type": "chat" }, { - "id": "meta/llama-3.1-405b-instruct", - "name": "meta/llama-3.1-405b-instruct", - "display_name": "meta/llama-3.1-405b-instruct", + "id": "aihubmix-Llama-3-2-11B-Vision", + "name": "aihubmix-Llama-3-2-11B-Vision", + "display_name": "aihubmix-Llama-3-2-11B-Vision", "limit": { "context": 8192, "output": 8192 @@ -233503,15 +232558,15 @@ "supported": false }, "cost": { - "input": 5, - "output": 5 + "input": 0.4, + "output": 0.4 }, "type": "chat" }, { - "id": "meta/llama3-8B-chat", - "name": "meta/llama3-8B-chat", - "display_name": "meta/llama3-8B-chat", + "id": "aihubmix-Llama-3-2-90B-Vision", + "name": "aihubmix-Llama-3-2-90B-Vision", + "display_name": "aihubmix-Llama-3-2-90B-Vision", "limit": { "context": 8192, "output": 8192 @@ -233521,15 +232576,15 @@ "supported": false }, "cost": { - "input": 0.3, - "output": 0.3 + "input": 2.4, + "output": 2.4 }, "type": "chat" }, { - "id": "mistralai/mistral-7b-instruct:free", - "name": "mistralai/mistral-7b-instruct:free", - "display_name": "mistralai/mistral-7b-instruct:free", + "id": "aihubmix-Llama-3-70B-Instruct", + "name": "aihubmix-Llama-3-70B-Instruct", + "display_name": "aihubmix-Llama-3-70B-Instruct", "limit": { "context": 8192, "output": 8192 @@ -233539,15 +232594,15 @@ "supported": false }, "cost": { - "input": 0.002, - "output": 0.002 + "input": 0.7, + "output": 0.7 }, "type": "chat" }, { - "id": "mm-minimax-m3", - "name": "mm-minimax-m3", - "display_name": "mm-minimax-m3", + "id": "aihubmix-Mistral-large", + "name": "aihubmix-Mistral-large", + "display_name": "aihubmix-Mistral-large", "limit": { "context": 8192, "output": 8192 @@ -233557,15 +232612,20 @@ "supported": false }, "cost": { - "input": 0.288, - "output": 1.152 + "input": 4, + "output": 12 }, "type": "chat" }, { - "id": "moonshot-kimi-k2.5", - "name": "moonshot-kimi-k2.5", - "display_name": "moonshot-kimi-k2.5", + "id": "aihubmix-command-r-08-2024", + "name": "aihubmix-command-r-08-2024", + "display_name": "aihubmix-command-r-08-2024", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -233575,16 +232635,20 @@ "supported": false }, "cost": { - "input": 0.6, - "output": 3, - "cache_read": 0.105 + "input": 0.2, + "output": 0.8 }, "type": "chat" }, { - "id": "moonshot-v1-128k", - "name": "moonshot-v1-128k", - "display_name": "moonshot-v1-128k", + "id": "aihubmix-command-r-plus", + "name": "aihubmix-command-r-plus", + "display_name": "aihubmix-command-r-plus", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -233594,15 +232658,20 @@ "supported": false }, "cost": { - "input": 10, - "output": 10 + "input": 3.84, + "output": 19.2 }, "type": "chat" }, { - "id": "moonshot-v1-128k-vision-preview", - "name": "moonshot-v1-128k-vision-preview", - "display_name": "moonshot-v1-128k-vision-preview", + "id": "aihubmix-command-r-plus-08-2024", + "name": "aihubmix-command-r-plus-08-2024", + "display_name": "aihubmix-command-r-plus-08-2024", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -233612,15 +232681,15 @@ "supported": false }, "cost": { - "input": 10, - "output": 10 + "input": 2.8, + "output": 11.2 }, "type": "chat" }, { - "id": "moonshot-v1-32k", - "name": "moonshot-v1-32k", - "display_name": "moonshot-v1-32k", + "id": "alicloud-deepseek-v3.2", + "name": "alicloud-deepseek-v3.2", + "display_name": "alicloud-deepseek-v3.2", "limit": { "context": 8192, "output": 8192 @@ -233630,15 +232699,16 @@ "supported": false }, "cost": { - "input": 4, - "output": 4 + "input": 0.274, + "output": 0.411, + "cache_read": 0.0548 }, "type": "chat" }, { - "id": "moonshot-v1-32k-vision-preview", - "name": "moonshot-v1-32k-vision-preview", - "display_name": "moonshot-v1-32k-vision-preview", + "id": "alicloud-glm-4.7", + "name": "alicloud-glm-4.7", + "display_name": "alicloud-glm-4.7", "limit": { "context": 8192, "output": 8192 @@ -233648,15 +232718,16 @@ "supported": false }, "cost": { - "input": 4, - "output": 4 + "input": 0.41096, + "output": 1.917786, + "cache_read": 0.41096 }, "type": "chat" }, { - "id": "moonshot-v1-8k", - "name": "moonshot-v1-8k", - "display_name": "moonshot-v1-8k", + "id": "alicloud-kimi-k2-thinking", + "name": "alicloud-kimi-k2-thinking", + "display_name": "alicloud-kimi-k2-thinking", "limit": { "context": 8192, "output": 8192 @@ -233666,33 +232737,34 @@ "supported": false }, "cost": { - "input": 2, - "output": 2 + "input": 0.548, + "output": 2.192 }, "type": "chat" }, { - "id": "moonshot-v1-8k-vision-preview", - "name": "moonshot-v1-8k-vision-preview", - "display_name": "moonshot-v1-8k-vision-preview", + "id": "alicloud-kimi-k2.5", + "name": "alicloud-kimi-k2.5", + "display_name": "alicloud-kimi-k2.5", "limit": { - "context": 8192, - "output": 8192 + "context": 256000, + "output": 256000 }, "tool_call": false, "reasoning": { "supported": false }, "cost": { - "input": 2, - "output": 2 + "input": 0.548, + "output": 2.877, + "cache_read": 0.0959 }, "type": "chat" }, { - "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", - "name": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", - "display_name": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "id": "alicloud-minimax-m2.5", + "name": "alicloud-minimax-m2.5", + "display_name": "alicloud-minimax-m2.5", "limit": { "context": 8192, "output": 8192 @@ -233702,50 +232774,47 @@ "supported": false }, "cost": { - "input": 0.5, - "output": 0.5, - "cache_read": 0 + "input": 0.2876, + "output": 1.1504, + "cache_read": 0.05752 }, "type": "chat" }, { - "id": "o1-mini-2024-09-12", - "name": "o1-mini-2024-09-12", - "display_name": "o1-mini-2024-09-12", + "id": "anthropic-opus-4-6", + "name": "anthropic-opus-4-6", + "display_name": "anthropic-opus-4-6", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { - "context": 8192, - "output": 8192 + "context": 200000, + "output": 200000 }, - "tool_call": false, + "tool_call": true, "reasoning": { "supported": true, "default": true }, "extra_capabilities": { "reasoning": { - "supported": true, - "default_enabled": true, - "mode": "effort", - "effort": "medium", - "effort_options": [ - "low", - "medium", - "high" - ], - "visibility": "hidden" + "supported": true } }, "cost": { - "input": 3, - "output": 12, - "cache_read": 1.5 + "input": 5, + "output": 25, + "cache_read": 0.5 }, "type": "chat" }, { - "id": "omni-moderation-latest", - "name": "omni-moderation-latest", - "display_name": "omni-moderation-latest", + "id": "azure-deepseek-v3.2", + "name": "azure-deepseek-v3.2", + "display_name": "azure-deepseek-v3.2", "limit": { "context": 8192, "output": 8192 @@ -233755,75 +232824,51 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 0.58, + "output": 1.680028 }, "type": "chat" }, { - "id": "qwen-flash", - "name": "qwen-flash", - "display_name": "qwen-flash", + "id": "azure-deepseek-v3.2-speciale", + "name": "azure-deepseek-v3.2-speciale", + "display_name": "azure-deepseek-v3.2-speciale", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, "cost": { - "input": 0.02, - "output": 0.2, - "cache_read": 0.02 + "input": 0.58, + "output": 1.680028 }, "type": "chat" }, { - "id": "qwen-flash-2025-07-28", - "name": "qwen-flash-2025-07-28", - "display_name": "qwen-flash-2025-07-28", + "id": "azure-kimi-k2.5", + "name": "azure-kimi-k2.5", + "display_name": "azure-kimi-k2.5", "limit": { - "context": 8192, - "output": 8192 + "context": 256000, + "output": 256000 }, "tool_call": false, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, "cost": { - "input": 0.02, - "output": 0.2, - "cache_read": 0.02 + "input": 0.6, + "output": 3 }, "type": "chat" }, { - "id": "qwen-long", - "name": "qwen-long", - "display_name": "qwen-long", + "id": "cbs-glm-4.7", + "name": "cbs-glm-4.7", + "display_name": "cbs-glm-4.7", "limit": { "context": 8192, "output": 8192 @@ -233833,15 +232878,15 @@ "supported": false }, "cost": { - "input": 0.1, - "output": 0.4 + "input": 2.25, + "output": 2.749995 }, "type": "chat" }, { - "id": "qwen-max", - "name": "qwen-max", - "display_name": "qwen-max", + "id": "cerebras-llama-3.3-70b", + "name": "cerebras-llama-3.3-70b", + "display_name": "cerebras-llama-3.3-70b", "limit": { "context": 8192, "output": 8192 @@ -233851,15 +232896,15 @@ "supported": false }, "cost": { - "input": 0.38, - "output": 1.52 + "input": 0.6, + "output": 0.6 }, "type": "chat" }, { - "id": "qwen-max-longcontext", - "name": "qwen-max-longcontext", - "display_name": "qwen-max-longcontext", + "id": "chatglm_lite", + "name": "chatglm_lite", + "display_name": "chatglm_lite", "limit": { "context": 8192, "output": 8192 @@ -233869,85 +232914,51 @@ "supported": false }, "cost": { - "input": 7, - "output": 21 + "input": 0.2858, + "output": 0.2858 }, "type": "chat" }, { - "id": "qwen-plus", - "name": "qwen-plus", - "display_name": "qwen-plus", + "id": "chatglm_pro", + "name": "chatglm_pro", + "display_name": "chatglm_pro", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, "cost": { - "input": 0.1126, - "output": 1.126, - "cache_read": 0.02252 + "input": 1.4286, + "output": 1.4286 }, "type": "chat" }, { - "id": "qwen-turbo", - "name": "qwen-turbo", - "display_name": "qwen-turbo", - "modalities": { - "input": [ - "text" - ] - }, + "id": "chatglm_std", + "name": "chatglm_std", + "display_name": "chatglm_std", "limit": { "context": 8192, "output": 8192 }, "tool_call": false, "reasoning": { - "supported": true - }, - "extra_capabilities": { - "reasoning": { - "supported": true, - "interleaved": true, - "summaries": true, - "visibility": "summary", - "continuation": [ - "thinking_blocks" - ] - } + "supported": false }, "cost": { - "input": 0.046, - "output": 0.092, - "cache_read": 0.0092 + "input": 0.7144, + "output": 0.7144 }, "type": "chat" }, { - "id": "qwen-turbo-2024-11-01", - "name": "qwen-turbo-2024-11-01", - "display_name": "qwen-turbo-2024-11-01", - "modalities": { - "input": [ - "text" - ] - }, + "id": "chatglm_turbo", + "name": "chatglm_turbo", + "display_name": "chatglm_turbo", "limit": { "context": 8192, "output": 8192 @@ -233957,15 +232968,15 @@ "supported": false }, "cost": { - "input": 0.046, - "output": 0.092 + "input": 0.7144, + "output": 0.7144 }, "type": "chat" }, { - "id": "qwen2.5-14b-instruct", - "name": "qwen2.5-14b-instruct", - "display_name": "qwen2.5-14b-instruct", + "id": "claude-2", + "name": "claude-2", + "display_name": "claude-2", "limit": { "context": 8192, "output": 8192 @@ -233975,15 +232986,15 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 1.2 + "input": 8.8, + "output": 8.8 }, "type": "chat" }, { - "id": "qwen2.5-32b-instruct", - "name": "qwen2.5-32b-instruct", - "display_name": "qwen2.5-32b-instruct", + "id": "claude-2.0", + "name": "claude-2.0", + "display_name": "claude-2.0", "limit": { "context": 8192, "output": 8192 @@ -233993,15 +233004,15 @@ "supported": false }, "cost": { - "input": 0.6, - "output": 1.2 + "input": 8.8, + "output": 39.6 }, "type": "chat" }, { - "id": "qwen2.5-3b-instruct", - "name": "qwen2.5-3b-instruct", - "display_name": "qwen2.5-3b-instruct", + "id": "claude-2.1", + "name": "claude-2.1", + "display_name": "claude-2.1", "limit": { "context": 8192, "output": 8192 @@ -234011,15 +233022,21 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 0.8 + "input": 8.8, + "output": 39.6 }, "type": "chat" }, { - "id": "qwen2.5-72b-instruct", - "name": "qwen2.5-72b-instruct", - "display_name": "qwen2.5-72b-instruct", + "id": "claude-3-haiku-20240229", + "name": "claude-3-haiku-20240229", + "display_name": "claude-3-haiku-20240229", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -234029,15 +233046,21 @@ "supported": false }, "cost": { - "input": 0.8, - "output": 2.4 + "input": 0.275, + "output": 0.275 }, "type": "chat" }, { - "id": "qwen2.5-7b-instruct", - "name": "qwen2.5-7b-instruct", - "display_name": "qwen2.5-7b-instruct", + "id": "claude-3-haiku-20240307", + "name": "claude-3-haiku-20240307", + "display_name": "claude-3-haiku-20240307", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -234047,15 +233070,21 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 0.8 + "input": 0.275, + "output": 1.375 }, "type": "chat" }, { - "id": "qwen2.5-coder-1.5b-instruct", - "name": "qwen2.5-coder-1.5b-instruct", - "display_name": "qwen2.5-coder-1.5b-instruct", + "id": "claude-3-sonnet-20240229", + "name": "claude-3-sonnet-20240229", + "display_name": "claude-3-sonnet-20240229", + "modalities": { + "input": [ + "text", + "image" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -234065,15 +233094,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.4 + "input": 3.3, + "output": 16.5 }, "type": "chat" }, { - "id": "qwen2.5-coder-7b-instruct", - "name": "qwen2.5-coder-7b-instruct", - "display_name": "qwen2.5-coder-7b-instruct", + "id": "claude-instant-1", + "name": "claude-instant-1", + "display_name": "claude-instant-1", "limit": { "context": 8192, "output": 8192 @@ -234083,15 +233112,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.4 + "input": 1.793, + "output": 1.793 }, "type": "chat" }, { - "id": "qwen2.5-math-1.5b-instruct", - "name": "qwen2.5-math-1.5b-instruct", - "display_name": "qwen2.5-math-1.5b-instruct", + "id": "claude-instant-1.2", + "name": "claude-instant-1.2", + "display_name": "claude-instant-1.2", "limit": { "context": 8192, "output": 8192 @@ -234101,15 +233130,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 0.88, + "output": 3.96 }, "type": "chat" }, { - "id": "qwen2.5-math-72b-instruct", - "name": "qwen2.5-math-72b-instruct", - "display_name": "qwen2.5-math-72b-instruct", + "id": "code-davinci-edit-001", + "name": "code-davinci-edit-001", + "display_name": "code-davinci-edit-001", "limit": { "context": 8192, "output": 8192 @@ -234119,15 +233148,15 @@ "supported": false }, "cost": { - "input": 0.8, - "output": 2.4 + "input": 20, + "output": 20 }, "type": "chat" }, { - "id": "qwen2.5-math-7b-instruct", - "name": "qwen2.5-math-7b-instruct", - "display_name": "qwen2.5-math-7b-instruct", + "id": "cogview-3", + "name": "cogview-3", + "display_name": "cogview-3", "limit": { "context": 8192, "output": 8192 @@ -234137,15 +233166,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.4 + "input": 35.5, + "output": 35.5 }, "type": "chat" }, { - "id": "step-2-16k", - "name": "step-2-16k", - "display_name": "step-2-16k", + "id": "cogview-3-plus", + "name": "cogview-3-plus", + "display_name": "cogview-3-plus", "limit": { "context": 8192, "output": 8192 @@ -234155,15 +233184,20 @@ "supported": false }, "cost": { - "input": 2, - "output": 2 + "input": 10, + "output": 10 }, "type": "chat" }, { - "id": "text-ada-001", - "name": "text-ada-001", - "display_name": "text-ada-001", + "id": "command", + "name": "command", + "display_name": "command", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -234173,15 +233207,15 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 0.4 + "input": 1, + "output": 2 }, "type": "chat" }, { - "id": "text-babbage-001", - "name": "text-babbage-001", - "display_name": "text-babbage-001", + "id": "command-light", + "name": "command-light", + "display_name": "command-light", "limit": { "context": 8192, "output": 8192 @@ -234191,15 +233225,15 @@ "supported": false }, "cost": { - "input": 0.5, - "output": 0.5 + "input": 1, + "output": 2 }, "type": "chat" }, { - "id": "text-curie-001", - "name": "text-curie-001", - "display_name": "text-curie-001", + "id": "command-light-nightly", + "name": "command-light-nightly", + "display_name": "command-light-nightly", "limit": { "context": 8192, "output": 8192 @@ -234209,15 +233243,15 @@ "supported": false }, "cost": { - "input": 2, + "input": 1, "output": 2 }, "type": "chat" }, { - "id": "text-davinci-002", - "name": "text-davinci-002", - "display_name": "text-davinci-002", + "id": "command-nightly", + "name": "command-nightly", + "display_name": "command-nightly", "limit": { "context": 8192, "output": 8192 @@ -234227,15 +233261,20 @@ "supported": false }, "cost": { - "input": 20, - "output": 20 + "input": 1, + "output": 2 }, "type": "chat" }, { - "id": "text-davinci-003", - "name": "text-davinci-003", - "display_name": "text-davinci-003", + "id": "command-r", + "name": "command-r", + "display_name": "command-r", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -234245,15 +233284,20 @@ "supported": false }, "cost": { - "input": 20, - "output": 20 + "input": 0.64, + "output": 1.92 }, "type": "chat" }, { - "id": "text-davinci-edit-001", - "name": "text-davinci-edit-001", - "display_name": "text-davinci-edit-001", + "id": "command-r-08-2024", + "name": "command-r-08-2024", + "display_name": "command-r-08-2024", + "modalities": { + "input": [ + "text" + ] + }, "limit": { "context": 8192, "output": 8192 @@ -234263,15 +233307,15 @@ "supported": false }, "cost": { - "input": 20, - "output": 20 + "input": 0.2, + "output": 0.8 }, "type": "chat" }, { - "id": "text-embedding-3-large", - "name": "text-embedding-3-large", - "display_name": "text-embedding-3-large", + "id": "command-r-plus", + "name": "command-r-plus", + "display_name": "command-r-plus", "modalities": { "input": [ "text" @@ -234286,15 +233330,15 @@ "supported": false }, "cost": { - "input": 0.13, - "output": 0.13 + "input": 3.84, + "output": 19.2 }, - "type": "embedding" + "type": "chat" }, { - "id": "text-embedding-3-small", - "name": "text-embedding-3-small", - "display_name": "text-embedding-3-small", + "id": "command-r-plus-08-2024", + "name": "command-r-plus-08-2024", + "display_name": "command-r-plus-08-2024", "modalities": { "input": [ "text" @@ -234309,18 +233353,19 @@ "supported": false }, "cost": { - "input": 0.02, - "output": 0.02 + "input": 2.8, + "output": 11.2 }, - "type": "embedding" + "type": "chat" }, { - "id": "text-embedding-ada-002", - "name": "text-embedding-ada-002", - "display_name": "text-embedding-ada-002", + "id": "dall-e-2", + "name": "dall-e-2", + "display_name": "dall-e-2", "modalities": { "input": [ - "text" + "text", + "image" ] }, "limit": { @@ -234332,20 +233377,15 @@ "supported": false }, "cost": { - "input": 0.1, - "output": 0.1 + "input": 16, + "output": 16 }, - "type": "embedding" + "type": "imageGeneration" }, { - "id": "text-embedding-v1", - "name": "text-embedding-v1", - "display_name": "text-embedding-v1", - "modalities": { - "input": [ - "text" - ] - }, + "id": "davinci", + "name": "davinci", + "display_name": "davinci", "limit": { "context": 8192, "output": 8192 @@ -234355,20 +233395,15 @@ "supported": false }, "cost": { - "input": 0.1, - "output": 0.1 + "input": 20, + "output": 20 }, - "type": "embedding" + "type": "chat" }, { - "id": "tts-1-hd-1106", - "name": "tts-1-hd-1106", - "display_name": "tts-1-hd-1106", - "modalities": { - "input": [ - "audio" - ] - }, + "id": "davinci-002", + "name": "davinci-002", + "display_name": "davinci-002", "limit": { "context": 8192, "output": 8192 @@ -234378,19 +233413,15 @@ "supported": false }, "cost": { - "input": 30, - "output": 30 - } + "input": 2, + "output": 2 + }, + "type": "chat" }, { - "id": "tts-1-hd", - "name": "tts-1-hd", - "display_name": "tts-1-hd", - "modalities": { - "input": [ - "audio" - ] - }, + "id": "deepinfra-llama-3.1-8b-instant", + "name": "deepinfra-llama-3.1-8b-instant", + "display_name": "deepinfra-llama-3.1-8b-instant", "limit": { "context": 8192, "output": 8192 @@ -234400,19 +233431,15 @@ "supported": false }, "cost": { - "input": 30, - "output": 30 - } + "input": 0.033, + "output": 0.054978 + }, + "type": "chat" }, { - "id": "tts-1-1106", - "name": "tts-1-1106", - "display_name": "tts-1-1106", - "modalities": { - "input": [ - "audio" - ] - }, + "id": "deepinfra-llama-3.3-70b-instant-turbo", + "name": "deepinfra-llama-3.3-70b-instant-turbo", + "display_name": "deepinfra-llama-3.3-70b-instant-turbo", "limit": { "context": 8192, "output": 8192 @@ -234422,19 +233449,15 @@ "supported": false }, "cost": { - "input": 15, - "output": 15 - } + "input": 0.11, + "output": 0.352 + }, + "type": "chat" }, { - "id": "whisper-1", - "name": "whisper-1", - "display_name": "whisper-1", - "modalities": { - "input": [ - "audio" - ] - }, + "id": "deepinfra-llama-4-maverick-17b-128e-instruct", + "name": "deepinfra-llama-4-maverick-17b-128e-instruct", + "display_name": "deepinfra-llama-4-maverick-17b-128e-instruct", "limit": { "context": 8192, "output": 8192 @@ -234444,20 +233467,34 @@ "supported": false }, "cost": { - "input": 100, - "output": 100 + "input": 1.65, + "output": 6.6 }, "type": "chat" }, { - "id": "whisper-large-v3", - "name": "whisper-large-v3", - "display_name": "whisper-large-v3", - "modalities": { - "input": [ - "audio" - ] + "id": "deepinfra-llama-4-scout-17b-16e-instruct", + "name": "deepinfra-llama-4-scout-17b-16e-instruct", + "display_name": "deepinfra-llama-4-scout-17b-16e-instruct", + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": false + }, + "cost": { + "input": 0.088, + "output": 0.33, + "cache_read": 0 }, + "type": "chat" + }, + { + "id": "deepseek-ai/DeepSeek-Coder-V2-Instruct", + "name": "deepseek-ai/DeepSeek-Coder-V2-Instruct", + "display_name": "deepseek-ai/DeepSeek-Coder-V2-Instruct", "limit": { "context": 8192, "output": 8192 @@ -234467,20 +233504,38 @@ "supported": false }, "cost": { - "input": 30.834, - "output": 30.834 + "input": 0.16, + "output": 0.32 }, "type": "chat" }, { - "id": "whisper-large-v3-turbo", - "name": "whisper-large-v3-turbo", - "display_name": "whisper-large-v3-turbo", - "modalities": { - "input": [ - "audio" - ] + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "limit": { + "context": 8192, + "output": 8192 + }, + "tool_call": false, + "reasoning": { + "supported": true + }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, + "cost": { + "input": 0.6, + "output": 0.6 }, + "type": "chat" + }, + { + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "limit": { "context": 8192, "output": 8192 @@ -234490,20 +233545,15 @@ "supported": false }, "cost": { - "input": 5.556, - "output": 5.556 + "input": 0.01, + "output": 0.01 }, "type": "chat" }, { - "id": "tts-1", - "name": "tts-1", - "display_name": "tts-1", - "modalities": { - "input": [ - "audio" - ] - }, + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "limit": { "context": 8192, "output": 8192 @@ -234513,14 +233563,15 @@ "supported": false }, "cost": { - "input": 15, - "output": 15 - } + "input": 0.01, + "output": 0.01 + }, + "type": "chat" }, { - "id": "text-search-ada-doc-001", - "name": "text-search-ada-doc-001", - "display_name": "text-search-ada-doc-001", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "limit": { "context": 8192, "output": 8192 @@ -234530,15 +233581,15 @@ "supported": false }, "cost": { - "input": 20, - "output": 20 + "input": 0.1, + "output": 0.1 }, "type": "chat" }, { - "id": "text-moderation-stable", - "name": "text-moderation-stable", - "display_name": "text-moderation-stable", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "limit": { "context": 8192, "output": 8192 @@ -234554,9 +233605,9 @@ "type": "chat" }, { - "id": "text-moderation-latest", - "name": "text-moderation-latest", - "display_name": "text-moderation-latest", + "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "display_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "limit": { "context": 8192, "output": 8192 @@ -234566,15 +233617,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 0.01, + "output": 0.01 }, "type": "chat" }, { - "id": "text-moderation-007", - "name": "text-moderation-007", - "display_name": "text-moderation-007", + "id": "deepseek-ai/DeepSeek-V2-Chat", + "name": "deepseek-ai/DeepSeek-V2-Chat", + "display_name": "deepseek-ai/DeepSeek-V2-Chat", "limit": { "context": 8192, "output": 8192 @@ -234584,15 +233635,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 0.16, + "output": 0.32 }, "type": "chat" }, { - "id": "yi-large", - "name": "yi-large", - "display_name": "yi-large", + "id": "deepseek-ai/DeepSeek-V2.5", + "name": "deepseek-ai/DeepSeek-V2.5", + "display_name": "deepseek-ai/DeepSeek-V2.5", "limit": { "context": 8192, "output": 8192 @@ -234602,15 +233653,15 @@ "supported": false }, "cost": { - "input": 3, - "output": 3 + "input": 0.16, + "output": 0.32 }, "type": "chat" }, { - "id": "yi-large-rag", - "name": "yi-large-rag", - "display_name": "yi-large-rag", + "id": "deepseek-r1-distill-qianfan-llama-8b", + "name": "deepseek-r1-distill-qianfan-llama-8b", + "display_name": "deepseek-r1-distill-qianfan-llama-8b", "limit": { "context": 8192, "output": 8192 @@ -234620,15 +233671,15 @@ "supported": false }, "cost": { - "input": 4, - "output": 4 + "input": 0.137, + "output": 0.548 }, "type": "chat" }, { - "id": "yi-large-turbo", - "name": "yi-large-turbo", - "display_name": "yi-large-turbo", + "id": "doubao-1-5-pro-256k-250115", + "name": "doubao-1-5-pro-256k-250115", + "display_name": "doubao-1-5-pro-256k-250115", "limit": { "context": 8192, "output": 8192 @@ -234638,15 +233689,15 @@ "supported": false }, "cost": { - "input": 1.8, - "output": 1.8 + "input": 0.684, + "output": 1.2312 }, "type": "chat" }, { - "id": "yi-lightning", - "name": "yi-lightning", - "display_name": "yi-lightning", + "id": "doubao-1-5-pro-32k-250115", + "name": "doubao-1-5-pro-32k-250115", + "display_name": "doubao-1-5-pro-32k-250115", "limit": { "context": 8192, "output": 8192 @@ -234656,15 +233707,15 @@ "supported": false }, "cost": { - "input": 0.2, - "output": 0.2 + "input": 0.108, + "output": 0.27 }, "type": "chat" }, { - "id": "yi-medium", - "name": "yi-medium", - "display_name": "yi-medium", + "id": "gpt-4o-2024-08-06-global", + "name": "gpt-4o-2024-08-06-global", + "display_name": "gpt-4o-2024-08-06-global", "limit": { "context": 8192, "output": 8192 @@ -234674,15 +233725,16 @@ "supported": false }, "cost": { - "input": 0.4, - "output": 0.4 + "input": 2.5, + "output": 10, + "cache_read": 1.25 }, "type": "chat" }, { - "id": "yi-vl-plus", - "name": "yi-vl-plus", - "display_name": "yi-vl-plus", + "id": "gpt-4o-mini-global", + "name": "gpt-4o-mini-global", + "display_name": "gpt-4o-mini-global", "limit": { "context": 8192, "output": 8192 @@ -234692,8 +233744,9 @@ "supported": false }, "cost": { - "input": 0.000852, - "output": 0.000852 + "input": 0.15, + "output": 0.6, + "cache_read": 0.075 }, "type": "chat" }, @@ -234869,98 +233922,6 @@ "output": 0.685 }, "type": "chat" - }, - { - "id": "deepseek-r1-distill-qianfan-llama-8b", - "name": "deepseek-r1-distill-qianfan-llama-8b", - "display_name": "deepseek-r1-distill-qianfan-llama-8b", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 0.137, - "output": 0.548 - }, - "type": "chat" - }, - { - "id": "doubao-1-5-pro-256k-250115", - "name": "doubao-1-5-pro-256k-250115", - "display_name": "doubao-1-5-pro-256k-250115", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 0.684, - "output": 1.2312 - }, - "type": "chat" - }, - { - "id": "doubao-1-5-pro-32k-250115", - "name": "doubao-1-5-pro-32k-250115", - "display_name": "doubao-1-5-pro-32k-250115", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 0.108, - "output": 0.27 - }, - "type": "chat" - }, - { - "id": "gpt-4o-2024-08-06-global", - "name": "gpt-4o-2024-08-06-global", - "display_name": "gpt-4o-2024-08-06-global", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 2.5, - "output": 10, - "cache_read": 1.25 - }, - "type": "chat" - }, - { - "id": "gpt-4o-mini-global", - "name": "gpt-4o-mini-global", - "display_name": "gpt-4o-mini-global", - "limit": { - "context": 8192, - "output": 8192 - }, - "tool_call": false, - "reasoning": { - "supported": false - }, - "cost": { - "input": 0.15, - "output": 0.6, - "cache_read": 0.075 - }, - "type": "chat" } ] }, @@ -236612,8 +235573,8 @@ ] }, "limit": { - "context": 1048576, - "output": 1048576 + "context": 1000000, + "output": 65536 }, "tool_call": true, "reasoning": { @@ -238962,7 +237923,7 @@ }, "limit": { "context": 131072, - "output": 32768 + "output": 100352 }, "tool_call": true, "reasoning": { @@ -238984,7 +237945,7 @@ }, "limit": { "context": 262144, - "output": 262144 + "output": 100352 }, "tool_call": true, "reasoning": { @@ -239411,7 +238372,7 @@ }, "limit": { "context": 262144, - "output": 262144 + "output": 16384 }, "temperature": true, "tool_call": true, @@ -241686,6 +240647,11 @@ "supported": true, "default": true }, + "extra_capabilities": { + "reasoning": { + "supported": true + } + }, "attachment": true, "type": "imageGeneration" }, @@ -244067,6 +243033,7 @@ "supported": true } }, + "attachment": true, "type": "imageGeneration" }, { @@ -248135,7 +247102,7 @@ }, "limit": { "context": 262144, - "output": 262144 + "output": 100352 }, "tool_call": true, "reasoning": { @@ -248157,7 +247124,7 @@ }, "limit": { "context": 131072, - "output": 131072 + "output": 100352 }, "tool_call": true, "reasoning": { @@ -249607,7 +248574,7 @@ "attachment": true, "open_weights": false, "knowledge": "2024-07-31", - "release_date": "2024-10-22", + "release_date": "2024-11-04", "last_updated": "2024-10-22", "cost": { "input": 0.8, @@ -249908,7 +248875,7 @@ "attachment": true, "open_weights": false, "knowledge": "2025-03-31", - "release_date": "2024-11-24", + "release_date": "2025-11-24", "last_updated": "2025-11-24", "cost": { "input": 5, @@ -250789,9 +249756,7 @@ "display_name": "DeepSeek: DeepSeek V3.2", "modalities": { "input": [ - "text", - "image", - "pdf" + "text" ], "output": [ "text" @@ -250804,8 +249769,7 @@ "temperature": true, "tool_call": false, "reasoning": { - "supported": true, - "default": true + "supported": true }, "extra_capabilities": { "reasoning": { @@ -250830,9 +249794,7 @@ "display_name": "DeepSeek: DeepSeek V4 Flash", "modalities": { "input": [ - "text", - "image", - "pdf" + "text" ], "output": [ "text" @@ -250877,8 +249839,7 @@ "display_name": "DeepSeek: DeepSeek V4 Pro", "modalities": { "input": [ - "text", - "pdf" + "text" ], "output": [ "text" @@ -252057,8 +251018,7 @@ "modalities": { "input": [ "text", - "image", - "pdf" + "image" ], "output": [ "text" @@ -253115,8 +252075,7 @@ "temperature": true, "tool_call": true, "reasoning": { - "supported": true, - "default": true + "supported": false }, "attachment": true, "open_weights": false, @@ -254684,7 +253643,6 @@ "modalities": { "input": [ "text", - "image", "pdf" ], "output": [ diff --git a/scripts/afterPack.js b/scripts/afterPack.js index 279043ac7..f4a308369 100644 --- a/scripts/afterPack.js +++ b/scripts/afterPack.js @@ -1,7 +1,11 @@ import fs from 'node:fs/promises' import path from 'node:path' +import { gzip } from 'node:zlib' +import { promisify } from 'node:util' const LINUX_APP_NAME = 'deepchat' +const VSS_EXTENSION_NAME = 'vss.duckdb_extension' +const gzipAsync = promisify(gzip) const ARCH_NAMES = new Map([ [0, 'ia32'], [1, 'x64'], @@ -185,11 +189,38 @@ async function afterPackLinux({ appOutDir }) { await fs.chmod(scriptPath, 0o755) } +async function encodeMacVssExtension(context) { + if (context.electronPlatformName !== 'darwin') { + return + } + + const extensionPath = path.join( + getResourcesDir(context), + 'app.asar.unpacked', + 'runtime', + 'duckdb', + 'extensions', + VSS_EXTENSION_NAME + ) + + if (!(await pathExists(extensionPath))) { + return + } + + const base64Path = `${extensionPath}.b64` + const extension = await fs.readFile(extensionPath) + const compressed = await gzipAsync(extension) + await fs.writeFile(base64Path, compressed.toString('base64'), 'utf8') + await fs.rm(extensionPath, { force: true }) + console.info(`[afterPack] encoded macOS DuckDB VSS extension: ${base64Path}`) +} + async function afterPack(context) { const { targets, appOutDir } = context await copyFffNativePackages(context) await copyParcelWatcherNativePackages(context) + await encodeMacVssExtension(context) if (isLinux(targets)) { await afterPackLinux({ appOutDir }) diff --git a/scripts/installVss.js b/scripts/installVss.js index 9c0e874e1..e54cba350 100644 --- a/scripts/installVss.js +++ b/scripts/installVss.js @@ -1,52 +1,240 @@ -// install duckdb extension import fs from 'node:fs' import path from 'node:path' -import { fileURLToPath } from 'node:url' +import { createRequire } from 'node:module' +import { fileURLToPath, pathToFileURL } from 'node:url' +import zlib from 'node:zlib' -function isMacOS() { - return process.platform === 'darwin' +const require = createRequire(import.meta.url) +const duckdbPackage = require('@duckdb/node-api/package.json') + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) +export const extensionName = 'vss.duckdb_extension' +export const defaultRepository = 'https://extensions.duckdb.org' +export const defaultDownloadRetries = 3 +export const defaultRetryBaseDelayMs = 250 +export const defaultRequestTimeoutMs = 15_000 +const extensionMetadataFooterBytes = 64 * 1024 + +export class VssDownloadError extends Error { + constructor(message, options = {}) { + super(message) + this.name = 'VssDownloadError' + this.status = options.status + this.retryable = options.retryable === true + this.cause = options.cause + } +} + +export function parseArgs(argv) { + const options = {} + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index] + if (arg === '--') continue + if (!arg.startsWith('--')) continue + const [rawKey, inlineValue] = arg.slice(2).split('=', 2) + let value = inlineValue + if (value === undefined) { + const next = argv[index + 1] + if (next === undefined || next === '--' || next.startsWith('--')) { + throw new Error(`Missing value for --${rawKey}`) + } + value = next + index += 1 + } + options[rawKey] = value + } + return options +} + +export function normalizePlatform(value) { + switch (value) { + case 'darwin': + case 'mac': + case 'macos': + case 'osx': + return 'darwin' + case 'win32': + case 'windows': + case 'win': + return 'win32' + case 'linux': + return 'linux' + default: + throw new Error(`Unsupported DuckDB VSS platform: ${value}`) + } } -async function installVssExtension() { - if (isMacOS()) { - console.log('Skipping DuckDB extension installation on macOS') - return +export function normalizeArch(value) { + switch (value) { + case 'x64': + case 'amd64': + return 'x64' + case 'arm64': + case 'aarch64': + return 'arm64' + default: + throw new Error(`Unsupported DuckDB VSS architecture: ${value}`) } - const __filename = fileURLToPath(import.meta.url) - const __dirname = path.dirname(__filename) +} + +export function resolveDuckDbVersion(packageVersion) { + const base = packageVersion.split('-')[0] + if (!/^\d+\.\d+\.\d+$/.test(base)) { + throw new Error(`Cannot derive DuckDB extension version from @duckdb/node-api ${packageVersion}`) + } + return `v${base}` +} + +export function targetTriple(platform, arch) { + if (platform === 'darwin') return arch === 'arm64' ? 'osx_arm64' : 'osx_amd64' + if (platform === 'win32') return arch === 'arm64' ? 'windows_arm64' : 'windows_amd64' + if (platform === 'linux') return arch === 'arm64' ? 'linux_arm64' : 'linux_amd64' + throw new Error(`Unsupported DuckDB VSS target: ${platform}/${arch}`) +} + +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)) +} +async function fetchWithTimeout(fetchImpl, url, timeoutMs) { + const controller = new AbortController() + let timer + const timeout = new Promise((_, reject) => { + timer = setTimeout(() => { + controller.abort() + reject(new Error(`Timed out after ${timeoutMs}ms while downloading ${url}`)) + }, timeoutMs) + if (typeof timer.unref === 'function') timer.unref() + }) try { - const duckdb = await import('@duckdb/node-api') - const inst = await duckdb.DuckDBInstance.create(':memory:') - const conn = await inst.connect() + return await Promise.race([fetchImpl(url, { signal: controller.signal }), timeout]) + } finally { + clearTimeout(timer) + } +} - await conn.run('INSTALL vss') - const reader = await conn.runAndReadAll( - 'SELECT install_path FROM duckdb_extensions() WHERE extension_name = \'vss\'' +function isRetryableStatus(status) { + return status === 408 || status === 429 || status >= 500 +} + +function downloadErrorMessage(reason, context) { + const details = context + ? `DuckDB ${context.duckdbVersion}, target ${context.triple}, source ${context.url}` + : `source ${context?.url ?? 'unknown'}` + return `${reason} while downloading DuckDB VSS extension (${details})` +} + +export function validateExtensionMetadata(extension, expected) { + if (!Buffer.isBuffer(extension) || extension.length === 0) { + throw new Error('DuckDB VSS extension is empty or invalid') + } + const footer = extension + .subarray(Math.max(0, extension.length - extensionMetadataFooterBytes)) + .toString('latin1') + const missing = [] + if (!footer.includes('duckdb_signature')) missing.push('duckdb_signature') + if (!footer.includes(expected.duckdbVersion)) missing.push(expected.duckdbVersion) + if (!footer.includes(expected.triple)) missing.push(expected.triple) + if (missing.length > 0) { + throw new Error( + `DuckDB VSS extension metadata mismatch for ${expected.duckdbVersion}/${expected.triple}; missing ${missing.join(', ')}` ) - const rows = reader.getRows() - if (rows.length === 0) { - throw new Error('VSS extension not found after installation') - } - const sourcePath = rows[0][0] - if (!sourcePath || typeof sourcePath !== 'string') { - throw new Error('Invalid extension path returned from DuckDB') - } - console.log('vss extension path:', sourcePath) + } +} + +export async function downloadExtension(url, options = {}) { + const fetchImpl = options.fetchImpl ?? fetch + const sleepImpl = options.sleep ?? sleep + const retries = options.retries ?? defaultDownloadRetries + const baseDelayMs = options.baseDelayMs ?? defaultRetryBaseDelayMs + const timeoutMs = options.timeoutMs ?? defaultRequestTimeoutMs + const onRetry = options.onRetry ?? (() => undefined) + let lastError = null - const targetDir = path.join(__dirname, '../runtime/duckdb/extensions') - if (!fs.existsSync(targetDir)) { - fs.mkdirSync(targetDir, { recursive: true }) + for (let attempt = 0; attempt <= retries; attempt += 1) { + try { + const response = await fetchWithTimeout(fetchImpl, url, timeoutMs) + if (!response.ok) { + const retryable = isRetryableStatus(response.status) + throw new VssDownloadError( + downloadErrorMessage(`HTTP ${response.status}`, options.context), + { + status: response.status, + retryable + } + ) + } + return Buffer.from(await response.arrayBuffer()) + } catch (error) { + const downloadError = + error instanceof VssDownloadError + ? error + : new VssDownloadError(downloadErrorMessage(String(error), options.context), { + retryable: true, + cause: error + }) + lastError = downloadError + if (!downloadError.retryable || attempt === retries) throw downloadError + const delayMs = baseDelayMs * 2 ** attempt + onRetry({ attempt: attempt + 1, retries, delayMs, error: downloadError }) + await sleepImpl(delayMs) } + } + + throw lastError ?? new Error(downloadErrorMessage('Unknown error', options.context)) +} + +export async function installVssExtension(argv = process.argv.slice(2), options = {}) { + const args = parseArgs(argv) + const platform = normalizePlatform(args.platform ?? process.platform) + const arch = normalizeArch(args.arch ?? process.arch) + const duckdbVersion = resolveDuckDbVersion(duckdbPackage.version) + const triple = targetTriple(platform, arch) + const repository = String( + args.repository ?? process.env.DUCKDB_EXTENSION_REPOSITORY ?? defaultRepository + ).replace(/\/+$/, '') + const url = `${repository}/${duckdbVersion}/${triple}/${extensionName}.gz` + const targetDir = path.join(__dirname, '../runtime/duckdb/extensions') + const targetPath = path.join(targetDir, extensionName) + const tempPath = `${targetPath}.tmp` + + console.log( + `[DuckDB VSS] installing ${extensionName} for ${platform}/${arch} (${triple}), DuckDB ${duckdbVersion}` + ) + console.log(`[DuckDB VSS] source: ${url}`) - const filename = sourcePath.substring(sourcePath.lastIndexOf(path.sep) + 1) - const targetPath = path.join(targetDir, filename) - fs.copyFileSync(sourcePath, targetPath) - console.log('Install duckdb extension successfully.') + try { + const compressed = await downloadExtension(url, { + fetchImpl: options.fetchImpl, + sleep: options.sleep, + retries: options.retries, + baseDelayMs: options.baseDelayMs, + timeoutMs: options.timeoutMs, + context: { duckdbVersion, triple, url }, + onRetry: + options.onRetry ?? + ((retry) => { + console.warn( + `[DuckDB VSS] retry ${retry.attempt}/${retry.retries} in ${retry.delayMs}ms: ${retry.error.message}` + ) + }) + }) + const extension = zlib.gunzipSync(compressed) + validateExtensionMetadata(extension, { duckdbVersion, triple }) + fs.mkdirSync(targetDir, { recursive: true }) + fs.writeFileSync(tempPath, extension) + fs.renameSync(tempPath, targetPath) + console.log(`[DuckDB VSS] installed: ${targetPath}`) } catch (error) { - console.error('Failed to install DuckDB extension:', error.message) - process.exit(1) + fs.rmSync(tempPath, { force: true }) + throw error } } -installVssExtension() +if (process.argv[1] && pathToFileURL(path.resolve(process.argv[1])).href === import.meta.url) { + installVssExtension().catch((error) => { + console.error('[DuckDB VSS] install failed:', error instanceof Error ? error.message : error) + process.exit(1) + }) +} diff --git a/scripts/smoke-duckdb-vss.js b/scripts/smoke-duckdb-vss.js index 633fbf0db..5eeb197ea 100644 --- a/scripts/smoke-duckdb-vss.js +++ b/scripts/smoke-duckdb-vss.js @@ -1,28 +1,189 @@ +import fs from 'node:fs' +import os from 'node:os' +import path from 'node:path' import { createRequire } from 'node:module' +import { fileURLToPath, pathToFileURL } from 'node:url' +import zlib from 'node:zlib' const require = createRequire(import.meta.url) const duckdbPackage = require('@duckdb/node-api/package.json') +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) +const extensionName = 'vss.duckdb_extension' + +export function parseArgs(argv) { + const options = {} + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index] + if (arg === '--') continue + if (!arg.startsWith('--')) continue + const [rawKey, inlineValue] = arg.slice(2).split('=', 2) + let value = inlineValue + if (value === undefined) { + const next = argv[index + 1] + if (next === undefined || next === '--' || next.startsWith('--')) { + throw new Error(`Missing value for --${rawKey}`) + } + value = next + index += 1 + } + options[rawKey] = value + } + return options +} + +function normalizePlatform(value) { + switch (value) { + case 'darwin': + case 'mac': + case 'macos': + case 'osx': + return 'darwin' + case 'win32': + case 'windows': + case 'win': + return 'win32' + case 'linux': + return 'linux' + default: + throw new Error(`Unsupported DuckDB VSS platform: ${value}`) + } +} + +function normalizeArch(value) { + switch (value) { + case 'x64': + case 'amd64': + return 'x64' + case 'arm64': + case 'aarch64': + return 'arm64' + default: + throw new Error(`Unsupported DuckDB VSS architecture: ${value}`) + } +} + +function escapeSqlPath(filePath) { + return filePath.replace(/\\/g, '\\\\').replace(/'/g, "''") +} + +function removeMaterializedDirBestEffort(materializedDir) { + try { + fs.rmSync(materializedDir, { recursive: true, force: true }) + } catch { + // best effort cleanup only + } +} + +export function materializeBase64Extension(base64Path) { + console.log(`[DuckDB Smoke] extension base64 path: ${base64Path}`) + if (!fs.existsSync(base64Path)) { + throw new Error(`Bundled VSS base64 extension not found at ${base64Path}`) + } + const materializedDir = fs.mkdtempSync(path.join(os.tmpdir(), 'deepchat-duckdb-vss-smoke-')) + const extensionPath = path.join(materializedDir, extensionName) + try { + const compressed = Buffer.from(fs.readFileSync(base64Path, 'utf8'), 'base64') + fs.writeFileSync(extensionPath, zlib.gunzipSync(compressed)) + return { extensionPath, materializedDir } + } catch (error) { + removeMaterializedDirBestEffort(materializedDir) + throw error + } +} + +export function materializeGzipExtension(gzipPath) { + console.log(`[DuckDB Smoke] extension gzip path: ${gzipPath}`) + if (!fs.existsSync(gzipPath)) { + throw new Error(`Bundled VSS gzip extension not found at ${gzipPath}`) + } + const materializedDir = fs.mkdtempSync(path.join(os.tmpdir(), 'deepchat-duckdb-vss-smoke-')) + const extensionPath = path.join(materializedDir, extensionName) + try { + fs.writeFileSync(extensionPath, zlib.gunzipSync(fs.readFileSync(gzipPath))) + return { extensionPath, materializedDir } + } catch (error) { + removeMaterializedDirBestEffort(materializedDir) + throw error + } +} + async function main() { + const args = parseArgs(process.argv.slice(2)) + const platform = args.platform ? normalizePlatform(args.platform) : process.platform + const arch = args.arch ? normalizeArch(args.arch) : process.arch + const extensionBase64Path = args.extensionBase64Path ?? args['extension-base64-path'] + const extensionGzipPath = args.extensionGzipPath ?? args['extension-gzip-path'] + let materializedDir = null + let instance = null + let connection = null + let extensionPath = path.resolve( + args.extensionPath ?? + args['extension-path'] ?? + path.join(__dirname, '../runtime/duckdb/extensions', extensionName) + ) + + if (extensionBase64Path) { + const materialized = materializeBase64Extension(path.resolve(extensionBase64Path)) + extensionPath = materialized.extensionPath + materializedDir = materialized.materializedDir + } else if (extensionGzipPath) { + const materialized = materializeGzipExtension(path.resolve(extensionGzipPath)) + extensionPath = materialized.extensionPath + materializedDir = materialized.materializedDir + } + console.log(`[DuckDB Smoke] package version: ${duckdbPackage.version}`) + console.log(`[DuckDB Smoke] extension path: ${extensionPath}`) - const duckdb = await import('@duckdb/node-api') - const instance = await duckdb.DuckDBInstance.create(':memory:') - const connection = await instance.connect() + if (!fs.existsSync(extensionPath)) { + throw new Error( + `Bundled VSS extension not found at ${extensionPath}. Run pnpm run installRuntime:duckdb:vss first.` + ) + } try { + if (platform !== process.platform || arch !== process.arch) { + console.log( + `[DuckDB Smoke] target ${platform}/${arch} differs from host ${process.platform}/${process.arch}; verified file presence only.` + ) + return + } + + const duckdb = await import('@duckdb/node-api') + instance = await duckdb.DuckDBInstance.create(':memory:') + connection = await instance.connect() + console.log('[DuckDB Smoke] created in-memory instance') - await connection.run('INSTALL vss') - console.log('[DuckDB Smoke] installed vss') - await connection.run('LOAD vss') - console.log('[DuckDB Smoke] loaded vss') + await connection.run(`LOAD '${escapeSqlPath(extensionPath)}';`) + console.log('[DuckDB Smoke] loaded bundled vss by path') + await connection.run('SET hnsw_enable_experimental_persistence = true;') + await connection.run('CREATE TABLE vss_smoke (id INTEGER, embedding FLOAT[2]);') + await connection.run( + "CREATE INDEX idx_vss_smoke ON vss_smoke USING HNSW (embedding) WITH (metric='cosine');" + ) + console.log('[DuckDB Smoke] created HNSW index') } finally { - connection.closeSync() - instance.closeSync() + try { + connection?.closeSync() + } catch { + // best effort cleanup only + } + try { + instance?.closeSync() + } catch { + // best effort cleanup only + } + if (materializedDir) { + removeMaterializedDirBestEffort(materializedDir) + } } } -main().catch((error) => { - console.error('[DuckDB Smoke] failed:', error) - process.exit(1) -}) +if (process.argv[1] && pathToFileURL(path.resolve(process.argv[1])).href === import.meta.url) { + main().catch((error) => { + console.error('[DuckDB Smoke] failed:', error) + process.exit(1) + }) +} diff --git a/src/main/lib/agentRuntime/questionTool.ts b/src/main/lib/agentRuntime/questionTool.ts index 979dcecb6..6da42df61 100644 --- a/src/main/lib/agentRuntime/questionTool.ts +++ b/src/main/lib/agentRuntime/questionTool.ts @@ -22,7 +22,7 @@ const questionOptionSchema = z.object({ }) export const questionToolSchema = z - .object({ + .strictObject({ header: z .string() .trim() @@ -59,7 +59,6 @@ export const questionToolSchema = z 'Whether free-form input is allowed for this question. The field name is `custom`, not `allowOther`.' ) }) - .strict() .describe( 'Ask exactly one blocking clarification question. For multiple clarifications, use multiple deepchat_question tool calls instead of sending a `questions` array.' ) diff --git a/src/main/presenter/agentRuntimePresenter/contextBuilder.ts b/src/main/presenter/agentRuntimePresenter/contextBuilder.ts index 1acc6f560..6c1321517 100644 --- a/src/main/presenter/agentRuntimePresenter/contextBuilder.ts +++ b/src/main/presenter/agentRuntimePresenter/contextBuilder.ts @@ -162,11 +162,21 @@ export function normalizeUserInput(input: string | SendMessageInput): SendMessag if (!input || typeof input !== 'object') { return { text: '', files: [] } } + const activeSkills = Array.isArray(input.activeSkills) + ? Array.from( + new Set( + input.activeSkills + .map((skillName) => (typeof skillName === 'string' ? skillName.trim() : '')) + .filter((skillName) => skillName.length > 0) + ) + ) + : [] return { text: typeof input.text === 'string' ? input.text : '', files: Array.isArray(input.files) ? (input.files.filter((file): file is MessageFile => Boolean(file)) as MessageFile[]) - : [] + : [], + ...(activeSkills.length > 0 ? { activeSkills } : {}) } } diff --git a/src/main/presenter/agentRuntimePresenter/contextWindowError.ts b/src/main/presenter/agentRuntimePresenter/contextWindowError.ts new file mode 100644 index 000000000..c994b3012 --- /dev/null +++ b/src/main/presenter/agentRuntimePresenter/contextWindowError.ts @@ -0,0 +1,139 @@ +const MAX_ERROR_TEXT_DEPTH = 4 +const MAX_ERROR_TEXT_FIELD_CHARS = 12_000 +const MAX_ERROR_TEXT_TOTAL_CHARS = 48_000 +const MAX_ERROR_ARRAY_ITEMS = 16 + +const STRONG_CONTEXT_WINDOW_ERROR_PATTERNS = [ + 'context window', + 'context length', + 'maximum context', + 'prompt too long' +] + +const TOKEN_CONTEXT_ERROR_PATTERNS = ['token limit', 'too many tokens', 'reduce the length'] +const TOKEN_CONTEXT_HINTS = ['context', 'prompt', 'input', 'request', 'message', 'schema'] +const INPUT_EXCEEDS_CONTEXT_HINTS = ['context', 'prompt', 'request', 'message', 'schema', 'token'] +const CONTEXT_ERROR_TEXT_FIELD_PRIORITY = [ + 'message', + 'error_message', + 'errorMessage', + 'error', + 'errors', + 'detail', + 'details', + 'issues', + 'reason', + 'description', + 'body', + 'response', + 'data', + 'cause' +] + +const NON_CONTEXT_TOKEN_ERROR_PATTERNS = [ + 'rate limit', + 'rate-limit', + 'tokens per minute', + 'token per minute', + 'insufficient quota', + 'monthly limit', + 'daily limit', + 'billing', + 'quota', + '429', + 'tpm', + 'rpm' +] + +export function isContextWindowErrorLike(value: unknown): boolean { + return hasContextWindowErrorText(value, new Set(), 0, { totalChars: 0 }) +} + +function isContextWindowErrorText(text: string): boolean { + const normalized = text.toLowerCase() + if (NON_CONTEXT_TOKEN_ERROR_PATTERNS.some((pattern) => normalized.includes(pattern))) { + return false + } + if (STRONG_CONTEXT_WINDOW_ERROR_PATTERNS.some((pattern) => normalized.includes(pattern))) { + return true + } + return ( + (TOKEN_CONTEXT_ERROR_PATTERNS.some((pattern) => normalized.includes(pattern)) && + TOKEN_CONTEXT_HINTS.some((hint) => normalized.includes(hint))) || + (normalized.includes('input exceeds') && + INPUT_EXCEEDS_CONTEXT_HINTS.some((hint) => normalized.includes(hint))) + ) +} + +function hasContextWindowErrorText( + value: unknown, + seen: Set, + depth: number, + state: { totalChars: number } +): boolean { + if (depth > MAX_ERROR_TEXT_DEPTH || state.totalChars >= MAX_ERROR_TEXT_TOTAL_CHARS) { + return false + } + if (typeof value === 'string') { + const remainingChars = MAX_ERROR_TEXT_TOTAL_CHARS - state.totalChars + if (remainingChars <= 0) { + return false + } + const text = value.slice(0, Math.min(MAX_ERROR_TEXT_FIELD_CHARS, remainingChars)) + state.totalChars += text.length + return isContextWindowErrorText(text) + } + if (Array.isArray(value)) { + if (seen.has(value)) { + return false + } + seen.add(value) + for (const item of value.slice(0, MAX_ERROR_ARRAY_ITEMS)) { + if (hasContextWindowErrorText(item, seen, depth, state)) { + return true + } + } + return false + } + if (value instanceof Error) { + if (seen.has(value)) { + return false + } + seen.add(value) + return ( + hasContextWindowErrorText(value.message, seen, depth + 1, state) || + hasContextWindowErrorText(value.name, seen, depth + 1, state) || + hasContextWindowErrorText(value.cause, seen, depth + 1, state) || + hasContextWindowErrorFields(value as unknown as Record, seen, depth, state, [ + 'message', + 'name', + 'cause' + ]) + ) + } + if (!value || typeof value !== 'object' || seen.has(value)) { + return false + } + + seen.add(value) + return hasContextWindowErrorFields(value as Record, seen, depth, state) +} + +function hasContextWindowErrorFields( + record: Record, + seen: Set, + depth: number, + state: { totalChars: number }, + skipKeys: string[] = [] +): boolean { + const skipped = new Set(skipKeys) + for (const key of CONTEXT_ERROR_TEXT_FIELD_PRIORITY) { + if (skipped.has(key)) { + continue + } + if (hasContextWindowErrorText(record[key], seen, depth + 1, state)) { + return true + } + } + return false +} diff --git a/src/main/presenter/agentRuntimePresenter/dispatch.ts b/src/main/presenter/agentRuntimePresenter/dispatch.ts index 300d4ebf4..80c041ca7 100644 --- a/src/main/presenter/agentRuntimePresenter/dispatch.ts +++ b/src/main/presenter/agentRuntimePresenter/dispatch.ts @@ -461,9 +461,9 @@ function extractSkillDraftPromptPayload( return { draftId, skillName } } -function shouldRefreshToolsAfterCall(toolName: string, rawData: MCPToolResponse): boolean { +function extractActivatedSkillAfterCall(toolName: string, rawData: MCPToolResponse): string | null { if (toolName !== 'skill_view') { - return false + return null } const toolResult = @@ -471,7 +471,13 @@ function shouldRefreshToolsAfterCall(toolName: string, rawData: MCPToolResponse) ? (rawData.toolResult as Record) : null - return toolResult?.activationApplied === true + if (toolResult?.activationApplied !== true) { + return null + } + + const activatedSkill = + typeof toolResult.activatedSkill === 'string' ? toolResult.activatedSkill.trim() : '' + return activatedSkill || null } function isParallelReadOnlyToolCall( @@ -977,7 +983,8 @@ async function runToolCall(params: { await toolPresenter.callTool(toolCall, { onProgress: applyProgressUpdate, signal: io.abortSignal, - permissionMode + permissionMode, + activeSkillNames: hooks?.getActiveSkillNames?.() }) let toolCallResult = await callTool() @@ -1061,6 +1068,11 @@ async function runToolCall(params: { preparedResult.kind === 'tool_error' ? preparedResult.message : preparedResult.content const stagedIsError = preparedResult.kind === 'tool_error' || toolRawData.isError === true + const activatedSkill = extractActivatedSkillAfterCall(completedToolCall.name, toolRawData) + if (activatedSkill) { + await hooks?.activateSkill?.(activatedSkill) + } + return { kind: 'staged', stagedResult: { @@ -1080,7 +1092,7 @@ async function runToolCall(params: { skillDraftPrompt: extractSkillDraftPromptPayload(toolRawData), postHookKind: stagedIsError ? 'failure' : 'success' }, - toolsChanged: shouldRefreshToolsAfterCall(completedToolCall.name, toolRawData) + toolsChanged: Boolean(activatedSkill) } } catch (err) { return buildToolErrorOutcome(execution, err) diff --git a/src/main/presenter/agentRuntimePresenter/index.ts b/src/main/presenter/agentRuntimePresenter/index.ts index 4e4eeac0f..dbb7a919d 100644 --- a/src/main/presenter/agentRuntimePresenter/index.ts +++ b/src/main/presenter/agentRuntimePresenter/index.ts @@ -43,6 +43,7 @@ import type { RateLimitQueueSnapshot } from '@shared/presenter' import type { MCPToolDefinition } from '@shared/types/core/mcp' +import type { LLMCoreStreamEvent } from '@shared/types/core/llm-events' import type { IToolPresenter } from '@shared/types/presenters/tool.presenter' import type { ReasoningPortrait } from '@shared/types/model-db' import { @@ -95,6 +96,8 @@ import { import { capAgentDefaultMaxTokens, capAgentRequestMaxTokens, + AGENT_CONTEXT_SAFETY_MARGIN_TOKENS, + buildRequestContextBudgetDiagnostics, buildRequestContextOverflowErrorMessage, estimateToolReserveTokens, fitRequestMessagesToContextWindow, @@ -152,6 +155,7 @@ import { insertBlocksAfterToolCall, prepareToolImagePreviewPresentation } from './imageGenerationBlocks' +import { isContextWindowErrorLike } from './contextWindowError' type PendingInteractionEntry = { interaction: PendingToolInteraction @@ -197,6 +201,41 @@ type PackageJsonManifest = { scripts?: Record } +const PROVIDER_OVERFLOW_RETRY_EXTRA_RESERVE_CAP = 8_192 + +function getProviderOverflowRetryExtraReserve(contextLength: number): number { + if (!Number.isFinite(contextLength) || contextLength <= 0) { + return 0 + } + return Math.max( + AGENT_CONTEXT_SAFETY_MARGIN_TOKENS, + Math.min(Math.floor(contextLength * 0.1), PROVIDER_OVERFLOW_RETRY_EXTRA_RESERVE_CAP) + ) +} + +function getProviderOverflowRetryMaxTokens(maxTokens: number): number { + const normalized = Number.isFinite(maxTokens) ? Math.floor(maxTokens) : 1 + return Math.max(1, Math.min(normalized, Math.floor(normalized / 2) || 1)) +} + +function isFirstProviderContextOverflowEvent(event: LLMCoreStreamEvent): boolean { + return event.type === 'error' && isContextWindowErrorLike(event.error_message) +} + +function buildProviderContextOverflowAfterRecoveryErrorMessage( + preflight: ReturnType +): string { + const diagnostics = buildRequestContextBudgetDiagnostics(preflight) + const formatTokenCount = (value: number): string => + Number.isFinite(value) ? String(Math.floor(value)) : 'unknown' + + return [ + 'The provider still reported a context overflow after DeepChat compacted or trimmed the request.', + `DeepChat local estimate: usable context ${formatTokenCount(diagnostics.usableContextLength)} tokens, estimated input ${formatTokenCount(diagnostics.inputTokens)} tokens, tool schemas ${formatTokenCount(diagnostics.toolReserveTokens)} tokens, requested output ${formatTokenCount(diagnostics.requestedMaxTokens)} tokens, effective output ${formatTokenCount(diagnostics.effectiveMaxTokens)} tokens, remaining output room ${formatTokenCount(diagnostics.remainingOutputTokens)} tokens.`, + 'The provider may count tokens, system prompts, or tool schemas differently. Try shortening the latest input or attachments, reducing active tools, skills, or system prompt content, lowering max output tokens, or increasing context length.' + ].join(' ') +} + function normalizeTopP(value: unknown): number | undefined { const numeric = parseFiniteNumericValue(value) return numeric !== undefined && numeric >= 0.1 && numeric <= 1 ? numeric : undefined @@ -363,6 +402,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { private readonly sessionProjectDirs: Map = new Map() private readonly systemPromptCache: Map = new Map() private readonly toolProfileCache: Map = new Map() + private readonly runtimeActivatedSkillsBySession: Map> = new Map() private readonly sessionCompactionStates: Map = new Map() private readonly interactionLocks: Set = new Set() private readonly resumingMessages: Set = new Set() @@ -548,6 +588,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { this.sessionProjectDirs.delete(sessionId) this.systemPromptCache.delete(sessionId) this.toolProfileCache.delete(sessionId) + this.runtimeActivatedSkillsBySession.delete(sessionId) this.sessionCompactionStates.delete(sessionId) this.drainingPendingQueues.delete(sessionId) this.toolPresenter?.clearConversationToolMapping?.(sessionId) @@ -887,7 +928,11 @@ export class AgentRuntimePresenter implements IAgentImplementation { const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId) this.logSlowPreStreamStep(sessionId, 'generation-settings', stepStartedAt) const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId) - const useContextBudget = this.shouldUseDeepChatContextBudget(state.providerId, modelConfig) + const useContextBudget = this.shouldUseDeepChatContextBudget( + state.providerId, + modelConfig, + state.modelId + ) this.throwIfAbortRequested(preStreamAbortSignal) const interleavedReasoning = this.resolveInterleavedReasoningConfig( state.providerId, @@ -897,17 +942,24 @@ export class AgentRuntimePresenter implements IAgentImplementation { const contextBudgetLength = this.resolveDeepChatContextBudgetLength( state.providerId, generationSettings.contextLength, - modelConfig + modelConfig, + state.modelId ) const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength) stepStartedAt = Date.now() - const activeSkillNames = await this.resolveActiveSkillNamesForToolProfile(sessionId) + this.resetRuntimeActivatedSkills(sessionId) + this.setRuntimeActivatedSkills(sessionId, normalizedInput.activeSkills ?? []) + const sessionActiveSkillNames = await this.resolveActiveSkillNamesForToolProfile(sessionId) + const effectiveActiveSkillNames = this.resolveEffectiveActiveSkillNames( + sessionActiveSkillNames, + sessionId + ) this.logSlowPreStreamStep(sessionId, 'active-skills', stepStartedAt) stepStartedAt = Date.now() const tools = await this.loadToolDefinitionsForSession( sessionId, projectDir, - activeSkillNames + effectiveActiveSkillNames ) this.logSlowPreStreamStep(sessionId, 'tool-definitions', stepStartedAt) const toolReserveTokens = estimateToolReserveTokens(tools) @@ -917,7 +969,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { sessionId, generationSettings.systemPrompt, tools, - activeSkillNames + effectiveActiveSkillNames ) this.logSlowPreStreamStep(sessionId, 'system-prompt', stepStartedAt) this.throwIfAbortRequested(preStreamAbortSignal) @@ -928,7 +980,10 @@ export class AgentRuntimePresenter implements IAgentImplementation { files: normalizedInput.files || [], links: [], search: false, - think: false + think: false, + ...(normalizedInput.activeSkills?.length + ? { activeSkills: normalizedInput.activeSkills } + : {}) } let compactionIntent: CompactionIntent | null = null @@ -1056,6 +1111,23 @@ export class AgentRuntimePresenter implements IAgentImplementation { promptPreview: normalizedInput.text, tools, baseSystemPrompt, + refreshSystemPrompt: async (activeSkillNames, refreshedTools) => { + const refreshedBasePrompt = await this.buildSystemPromptWithSkills( + sessionId, + generationSettings.systemPrompt, + refreshedTools, + activeSkillNames ?? effectiveActiveSkillNames + ) + return await this.appendMemoryInjection( + sessionId, + appendReconstructionAnchorStateSection( + appendSummarySection(refreshedBasePrompt, summaryState.summaryText), + this.sessionStore.getReconstructionAnchorPromptState(sessionId) + ), + normalizedInput.text, + userMessageId + ) + }, interleavedReasoning, viewContext: { taskType: 'chat', @@ -1200,6 +1272,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { } } finally { this.clearSessionAbortController(sessionId, preStreamAbortController) + this.resetRuntimeActivatedSkills(sessionId) } } @@ -2836,6 +2909,10 @@ export class AgentRuntimePresenter implements IAgentImplementation { promptPreview?: string interleavedReasoning?: InterleavedReasoningConfig viewContext?: PendingTapeViewContext + refreshSystemPrompt?: ( + activeSkillNames: string[] | undefined, + toolDefinitions: MCPToolDefinition[] + ) => Promise preStreamStartedAt?: number onRunRegistered?: (runId: string) => void }): Promise<{ runId: string; result: ProcessResult }> { @@ -2850,6 +2927,7 @@ export class AgentRuntimePresenter implements IAgentImplementation { promptPreview, interleavedReasoning: providedInterleavedReasoning, viewContext, + refreshSystemPrompt, preStreamStartedAt, onRunRegistered } = args @@ -2945,7 +3023,17 @@ export class AgentRuntimePresenter implements IAgentImplementation { const temperature = generationSettings.temperature const maxTokens = capAgentRequestMaxTokens(generationSettings.maxTokens, contextBudgetLength) - const tools = providedTools ?? (await this.loadToolDefinitionsForSession(sessionId, projectDir)) + const streamSessionActiveSkillNames = + await this.resolveActiveSkillNamesForToolProfile(sessionId) + const getEffectiveRuntimeSkillNames = (baseSkillNames = streamSessionActiveSkillNames) => + this.resolveEffectiveActiveSkillNames(baseSkillNames, sessionId) + const tools = + providedTools ?? + (await this.loadToolDefinitionsForSession( + sessionId, + projectDir, + getEffectiveRuntimeSkillNames() + )) const supportsVision = this.supportsVision(state.providerId, state.modelId) const supportsAudioInput = this.supportsAudioInput(state.providerId, state.modelId) @@ -2975,10 +3063,32 @@ export class AgentRuntimePresenter implements IAgentImplementation { projectDir }) + let contextOverflowHandoffAttemptedForRun = false + let strictProviderOverflowRetryUsedForRun = false const result = await processStream({ messages, tools, - refreshTools: async () => await this.loadToolDefinitionsForSession(sessionId, projectDir), + refreshTools: async (activeSkillNames) => + await this.loadToolDefinitionsForSession( + sessionId, + projectDir, + getEffectiveRuntimeSkillNames(activeSkillNames) + ), + refreshSystemPrompt: async (activeSkillNames, refreshedTools) => { + if (refreshSystemPrompt) { + return await refreshSystemPrompt( + getEffectiveRuntimeSkillNames(activeSkillNames), + refreshedTools + ) + } + const refreshedBasePrompt = await this.buildSystemPromptWithSkills( + sessionId, + generationSettings.systemPrompt, + refreshedTools, + getEffectiveRuntimeSkillNames(activeSkillNames) + ) + return refreshedBasePrompt + }, toolPresenter: this.toolPresenter, coreStream: async function* ( requestMessages, @@ -2990,137 +3100,303 @@ export class AgentRuntimePresenter implements IAgentImplementation { ) { const requestBypassesContextBudget = shouldBypassContextBudget( state.providerId, - requestModelConfig + requestModelConfig, + requestModelId ) let queuedForRateLimit = false try { - let providerMessages = requestMessages - let providerMaxTokens = requestMaxTokens - let recoveredFromContextPressure = false + let preflightContextRecoveryAttempted = false + let providerOverflowRecoveryAttempted = false + let providerContextOverflowRecoveryApplied = false + let strictProviderOverflowRetryPending = false let manifestSummaryCursorOrderSeq = viewContext?.summaryCursorOrderSeq ?? 1 const isTtsRequest = isTtsModelConfig(requestModelConfig) || isTtsModelId(requestModelId) const effectiveRequestTools: MCPToolDefinition[] = isTtsRequest ? [] : requestTools - - if (!requestBypassesContextBudget) { - let requestPreflight = preflightRequestContext({ - messages: requestMessages, - tools: effectiveRequestTools, - contextLength: requestModelConfig.contextLength, - requestedMaxTokens: requestMaxTokens - }) - if ( - requestPreflight.requiresContextPressureRecovery || - !requestPreflight.fitsWithinContext - ) { - const recovered = await recoverContextPressure({ - sessionId, - providerId: state.providerId, - modelId: requestModelId, - requestMessages: requestPreflight.messages, - baseSystemPrompt, - contextLength: requestModelConfig.contextLength, - requestedMaxTokens: requestPreflight.requestedMaxTokens, - tools: effectiveRequestTools, - supportsVision, - supportsAudioInput, - interleavedReasoning, - minimumProtectedTailCount: 0, - signal: abortController.signal - }) - recoveredFromContextPressure = true - if (recovered.summaryCursorOrderSeq !== undefined) { - manifestSummaryCursorOrderSeq = recovered.summaryCursorOrderSeq - } - requestMessages.splice(0, requestMessages.length, ...recovered.messages) - if (recovered.systemPrompt) { - replaceLeadingSystemPromptInPlace(requestMessages, recovered.systemPrompt) + const effectiveRequestToolReserveTokens = + estimateToolReserveTokens(effectiveRequestTools) + + const prepareProviderAttempt = async (options?: { + strictProviderOverflowRetry?: boolean + }): Promise<{ + providerMessages: ChatMessage[] + providerMaxTokens: number + }> => { + let providerMessages = requestMessages + let providerMaxTokens = requestMaxTokens + let manifestRequestedMaxTokens = requestMaxTokens + let manifestReserveTokens = requestMaxTokens + let strictExtraReserveTokens = 0 + let recoveredFromContextPressure = + providerContextOverflowRecoveryApplied || + options?.strictProviderOverflowRetry === true + + if (!requestBypassesContextBudget) { + let requestedMaxTokens = requestMaxTokens + if (options?.strictProviderOverflowRetry) { + strictProviderOverflowRetryUsedForRun = true + requestedMaxTokens = getProviderOverflowRetryMaxTokens(requestMaxTokens) + strictExtraReserveTokens = getProviderOverflowRetryExtraReserve( + requestModelConfig.contextLength + ) + requestMessages.splice( + 0, + requestMessages.length, + ...fitRequestMessagesToContextWindow({ + messages: requestMessages, + contextLength: requestModelConfig.contextLength, + reserveTokens: + requestedMaxTokens + + effectiveRequestToolReserveTokens + + strictExtraReserveTokens, + minimumProtectedTailCount: 0 + }) + ) } - requestPreflight = preflightRequestContext({ + + let requestPreflight = preflightRequestContext({ messages: requestMessages, tools: effectiveRequestTools, contextLength: requestModelConfig.contextLength, - requestedMaxTokens: requestMaxTokens + requestedMaxTokens }) - requestMessages.splice(0, requestMessages.length, ...requestPreflight.messages) + if ( + !options?.strictProviderOverflowRetry && + (requestPreflight.requiresContextPressureRecovery || + !requestPreflight.fitsWithinContext) + ) { + preflightContextRecoveryAttempted = true + recoveredFromContextPressure = true + if (!contextOverflowHandoffAttemptedForRun) { + contextOverflowHandoffAttemptedForRun = true + const recovered = await recoverContextPressure({ + sessionId, + providerId: state.providerId, + modelId: requestModelId, + requestMessages: requestPreflight.messages, + baseSystemPrompt, + contextLength: requestModelConfig.contextLength, + requestedMaxTokens: requestPreflight.requestedMaxTokens, + tools: effectiveRequestTools, + supportsVision, + supportsAudioInput, + interleavedReasoning, + minimumProtectedTailCount: 0, + signal: abortController.signal + }) + if (recovered.summaryCursorOrderSeq !== undefined) { + manifestSummaryCursorOrderSeq = recovered.summaryCursorOrderSeq + } + requestMessages.splice(0, requestMessages.length, ...recovered.messages) + if (recovered.systemPrompt) { + replaceLeadingSystemPromptInPlace(requestMessages, recovered.systemPrompt) + } + requestPreflight = preflightRequestContext({ + messages: requestMessages, + tools: effectiveRequestTools, + contextLength: requestModelConfig.contextLength, + requestedMaxTokens + }) + requestMessages.splice(0, requestMessages.length, ...requestPreflight.messages) + } + } + if (!requestPreflight.fitsWithinContext) { + throw new Error(buildRequestContextOverflowErrorMessage(requestPreflight)) + } + providerMessages = requestPreflight.messages + providerMaxTokens = requestPreflight.effectiveMaxTokens + manifestRequestedMaxTokens = requestPreflight.requestedMaxTokens + manifestReserveTokens = + requestPreflight.requestedMaxTokens + strictExtraReserveTokens } - if (!requestPreflight.fitsWithinContext) { - throw new Error(buildRequestContextOverflowErrorMessage(requestPreflight)) + if (providerMessages.length === 0) { + throw new Error('Request was not sent because the prompt became empty.') } - providerMessages = requestPreflight.messages - providerMaxTokens = requestPreflight.effectiveMaxTokens - } - if (providerMessages.length === 0) { - throw new Error('Request was not sent because the prompt became empty.') - } - requestSeq += 1 - const isInitialViewRequest = requestSeq === 1 && Boolean(viewContext) - const manifestPolicy = resolveTapeViewManifestPolicy({ - recoveredFromContextPressure, - isInitialViewRequest, - viewPolicy: viewContext?.policy, - viewPolicyVersion: viewContext?.policyVersion - }) - appendTapeViewManifest({ - sessionId, - messageId, - requestSeq, - taskType: isInitialViewRequest ? viewContext!.taskType : 'tool_loop', - policy: manifestPolicy.policy, - policyVersion: manifestPolicy.policyVersion, - messages: providerMessages, - tools: effectiveRequestTools, - tokenBudget: { + const manifestTokenBudget = { contextLength: requestModelConfig.contextLength ?? contextBudgetLength, - requestedMaxTokens: requestMaxTokens, + requestedMaxTokens: manifestRequestedMaxTokens, effectiveMaxTokens: providerMaxTokens, - reserveTokens: requestMaxTokens, - toolReserveTokens: estimateToolReserveTokens(effectiveRequestTools) - }, - providerId: state.providerId, - modelId: requestModelId, - selection: - isInitialViewRequest && !recoveredFromContextPressure - ? viewContext!.selection - : undefined, - summaryCursorOrderSeq: manifestSummaryCursorOrderSeq, - supportsVision: viewContext?.supportsVision ?? supportsVision, - supportsAudioInput: viewContext?.supportsAudioInput ?? supportsAudioInput, - traceDebugEnabled: viewContext?.traceDebugEnabled ?? traceEnabled - }) + reserveTokens: manifestReserveTokens, + toolReserveTokens: effectiveRequestToolReserveTokens + } - await llmProviderPresenter.executeWithRateLimit(state.providerId, { - signal: abortController.signal, - onQueued: (snapshot) => { - queuedForRateLimit = true - emitRateLimitWaitingMessage( - sessionId, - rateLimitMessageId, - activeGeneration.runId, - snapshot - ) + requestSeq += 1 + const isInitialViewRequest = requestSeq === 1 && Boolean(viewContext) + const manifestPolicy = resolveTapeViewManifestPolicy({ + recoveredFromContextPressure, + isInitialViewRequest, + viewPolicy: viewContext?.policy, + viewPolicyVersion: viewContext?.policyVersion + }) + appendTapeViewManifest({ + sessionId, + messageId, + requestSeq, + taskType: isInitialViewRequest ? viewContext!.taskType : 'tool_loop', + policy: manifestPolicy.policy, + policyVersion: manifestPolicy.policyVersion, + messages: providerMessages, + tools: effectiveRequestTools, + tokenBudget: manifestTokenBudget, + providerId: state.providerId, + modelId: requestModelId, + selection: + isInitialViewRequest && !recoveredFromContextPressure + ? viewContext!.selection + : undefined, + summaryCursorOrderSeq: manifestSummaryCursorOrderSeq, + supportsVision: viewContext?.supportsVision ?? supportsVision, + supportsAudioInput: viewContext?.supportsAudioInput ?? supportsAudioInput, + traceDebugEnabled: viewContext?.traceDebugEnabled ?? traceEnabled + }) + + return { providerMessages, providerMaxTokens } + } + + const recoverProviderContextOverflow = async ( + providerMessages: ChatMessage[], + providerMaxTokens: number + ): Promise => { + contextOverflowHandoffAttemptedForRun = true + providerOverflowRecoveryAttempted = true + const recovered = await recoverContextPressure({ + sessionId, + providerId: state.providerId, + modelId: requestModelId, + requestMessages: providerMessages, + baseSystemPrompt, + contextLength: requestModelConfig.contextLength, + requestedMaxTokens: providerMaxTokens, + tools: effectiveRequestTools, + supportsVision, + supportsAudioInput, + interleavedReasoning, + minimumProtectedTailCount: 0, + signal: abortController.signal + }) + if (recovered.summaryCursorOrderSeq !== undefined) { + manifestSummaryCursorOrderSeq = recovered.summaryCursorOrderSeq } - }) - if (queuedForRateLimit) { - clearRateLimitWaitingMessage(sessionId, rateLimitMessageId, activeGeneration.runId) - queuedForRateLimit = false + providerContextOverflowRecoveryApplied = true + strictProviderOverflowRetryPending = recovered.summaryCursorOrderSeq === undefined + requestMessages.splice(0, requestMessages.length, ...recovered.messages) + if (recovered.systemPrompt) { + replaceLeadingSystemPromptInPlace(requestMessages, recovered.systemPrompt) + } + } + + const buildProviderOverflowRetryFailure = ( + providerMessages: ChatMessage[], + providerMaxTokens: number + ): Error => { + const retryPreflight = preflightRequestContext({ + messages: providerMessages, + tools: effectiveRequestTools, + contextLength: requestModelConfig.contextLength, + requestedMaxTokens: providerMaxTokens + }) + return new Error( + retryPreflight.fitsWithinContext + ? buildProviderContextOverflowAfterRecoveryErrorMessage(retryPreflight) + : buildRequestContextOverflowErrorMessage(retryPreflight) + ) } - if (abortController.signal.aborted) { - throw createAbortError() + + const scheduleStrictProviderOverflowRetry = (): boolean => { + if (strictProviderOverflowRetryUsedForRun || strictProviderOverflowRetryPending) { + return false + } + strictProviderOverflowRetryPending = true + return true } - logPreStreamBoundary() - for await (const event of provider.coreStream( - providerMessages, - requestModelId, - requestModelConfig, - requestTemperature, - providerMaxTokens, - effectiveRequestTools - )) { - yield event + providerAttemptLoop: for (;;) { + const strictProviderOverflowRetry = strictProviderOverflowRetryPending + strictProviderOverflowRetryPending = false + const { providerMessages, providerMaxTokens } = await prepareProviderAttempt({ + strictProviderOverflowRetry + }) + + await llmProviderPresenter.executeWithRateLimit(state.providerId, { + signal: abortController.signal, + onQueued: (snapshot) => { + queuedForRateLimit = true + emitRateLimitWaitingMessage( + sessionId, + rateLimitMessageId, + activeGeneration.runId, + snapshot + ) + } + }) + if (queuedForRateLimit) { + clearRateLimitWaitingMessage(sessionId, rateLimitMessageId, activeGeneration.runId) + queuedForRateLimit = false + } + if (abortController.signal.aborted) { + throw createAbortError() + } + + logPreStreamBoundary() + let yieldedProviderEvent = false + try { + for await (const event of provider.coreStream( + providerMessages, + requestModelId, + requestModelConfig, + requestTemperature, + providerMaxTokens, + effectiveRequestTools + )) { + if ( + !yieldedProviderEvent && + !requestBypassesContextBudget && + isFirstProviderContextOverflowEvent(event) + ) { + if ( + strictProviderOverflowRetryUsedForRun || + providerOverflowRecoveryAttempted + ) { + throw buildProviderOverflowRetryFailure(providerMessages, providerMaxTokens) + } + if ( + preflightContextRecoveryAttempted || + contextOverflowHandoffAttemptedForRun + ) { + if (!scheduleStrictProviderOverflowRetry()) { + throw buildProviderOverflowRetryFailure(providerMessages, providerMaxTokens) + } + continue providerAttemptLoop + } + await recoverProviderContextOverflow(providerMessages, providerMaxTokens) + continue providerAttemptLoop + } + yieldedProviderEvent = true + yield event + } + break + } catch (error) { + if ( + !yieldedProviderEvent && + !requestBypassesContextBudget && + isContextWindowErrorLike(error) + ) { + if (strictProviderOverflowRetryUsedForRun || providerOverflowRecoveryAttempted) { + throw buildProviderOverflowRetryFailure(providerMessages, providerMaxTokens) + } + if (preflightContextRecoveryAttempted || contextOverflowHandoffAttemptedForRun) { + if (!scheduleStrictProviderOverflowRetry()) { + throw buildProviderOverflowRetryFailure(providerMessages, providerMaxTokens) + } + continue providerAttemptLoop + } + await recoverProviderContextOverflow(providerMessages, providerMaxTokens) + continue providerAttemptLoop + } + throw error + } } } catch (error) { if (queuedForRateLimit) { @@ -3149,6 +3425,11 @@ export class AgentRuntimePresenter implements IAgentImplementation { shouldYieldForPendingInput: () => Boolean(this.pendingInputCoordinator.getNextSteerInput(sessionId)), hooks: { + getActiveSkillNames: () => getEffectiveRuntimeSkillNames(), + activateSkill: async (skillName) => { + await this.activateRuntimeSkill(sessionId, skillName) + return getEffectiveRuntimeSkillNames() + }, onPreToolUse: (tool) => { this.dispatchHook('PreToolUse', { sessionId, @@ -3712,7 +3993,11 @@ export class AgentRuntimePresenter implements IAgentImplementation { this.throwIfAbortRequested(preStreamAbortSignal) const generationSettings = await this.getEffectiveSessionGenerationSettings(sessionId) const modelConfig = this.configPresenter.getModelConfig(state.modelId, state.providerId) - const useContextBudget = this.shouldUseDeepChatContextBudget(state.providerId, modelConfig) + const useContextBudget = this.shouldUseDeepChatContextBudget( + state.providerId, + modelConfig, + state.modelId + ) this.throwIfAbortRequested(preStreamAbortSignal) const interleavedReasoning = this.resolveInterleavedReasoningConfig( state.providerId, @@ -4198,13 +4483,13 @@ export class AgentRuntimePresenter implements IAgentImplementation { 'Before replying, always scan available skills. If any skill plausibly matches the task, call `skill_view` first.' ) lines.push( - 'Viewing a skill root `SKILL.md` pins it to the current conversation; viewing linked skill files is read-only and does not pin the skill.' + 'Viewing a skill root `SKILL.md` activates that skill for the current message/tool loop; it does not pin the skill to the conversation. Viewing linked skill files is read-only and does not activate the skill.' ) hasContent = true } if (capabilities.canRunSkillScripts) { lines.push( - 'Use `skill_run` only for pinned skills when a pinned skill provides bundled helper scripts.' + 'Use `skill_run` only for skills that are active in the current message/tool loop, including manually pinned skills and skills activated by `skill_view`.' ) hasContent = true } @@ -4252,13 +4537,58 @@ export class AgentRuntimePresenter implements IAgentImplementation { return '' } return [ - '## Pinned Skills', - 'These pinned skills are preloaded for this conversation. Follow them when relevant.', + '## Active Skills', + 'These skills are active for the current message context. Some may be manually pinned for the conversation; others may have been activated by `skill_view` for this message/tool loop only. Follow them when relevant.', '', skillSections.join('\n\n') ].join('\n') } + private resetRuntimeActivatedSkills(sessionId: string): void { + this.runtimeActivatedSkillsBySession.delete(sessionId) + } + + private setRuntimeActivatedSkills(sessionId: string, skillNames: string[]): void { + const normalizedSkillNames = this.normalizeSkillNames(skillNames) + if (normalizedSkillNames.length === 0) { + return + } + this.runtimeActivatedSkillsBySession.set(sessionId, new Set(normalizedSkillNames)) + } + + private getRuntimeActivatedSkills(sessionId: string): string[] { + return this.normalizeSkillNames( + Array.from(this.runtimeActivatedSkillsBySession.get(sessionId) ?? []) + ) + } + + private async activateRuntimeSkill(sessionId: string, skillName: string): Promise { + const normalizedSkillName = skillName.trim() + if (!normalizedSkillName) { + return this.getRuntimeActivatedSkills(sessionId) + } + + let activeSkills = this.runtimeActivatedSkillsBySession.get(sessionId) + if (!activeSkills) { + activeSkills = new Set() + this.runtimeActivatedSkillsBySession.set(sessionId, activeSkills) + } + activeSkills.add(normalizedSkillName) + this.invalidateSystemPromptCache(sessionId) + this.invalidateToolProfileCache(sessionId) + return this.getRuntimeActivatedSkills(sessionId) + } + + private resolveEffectiveActiveSkillNames( + sessionActiveSkillNames: string[], + sessionId: string + ): string[] { + return this.normalizeSkillNames([ + ...sessionActiveSkillNames, + ...this.getRuntimeActivatedSkills(sessionId) + ]) + } + private normalizeSkillNames(skillNames: string[]): string[] { return Array.from( new Set(skillNames.map((name) => name.trim()).filter((name) => name.length > 0)) @@ -5060,7 +5390,16 @@ export class AgentRuntimePresenter implements IAgentImplementation { const files = Array.isArray((parsed as { files?: unknown }).files) ? ((parsed as { files?: unknown }).files as MessageFile[]).filter((file) => Boolean(file)) : [] - return { text, files } + const activeSkills = this.normalizeSkillNames( + Array.isArray((parsed as { activeSkills?: unknown }).activeSkills) + ? ((parsed as { activeSkills?: unknown }).activeSkills as string[]) + : [] + ) + return { + text, + files, + ...(activeSkills.length > 0 ? { activeSkills } : {}) + } } catch { return { text: content, files: [] } } @@ -5077,7 +5416,14 @@ export class AgentRuntimePresenter implements IAgentImplementation { const files = Array.isArray(input.files) ? input.files.filter((file): file is MessageFile => Boolean(file)) : [] - return { text, files } + const activeSkills = this.normalizeSkillNames( + Array.isArray(input.activeSkills) ? input.activeSkills : [] + ) + return { + text, + files, + ...(activeSkills.length > 0 ? { activeSkills } : {}) + } } private queueVisibleSteerInput( @@ -5828,7 +6174,8 @@ export class AgentRuntimePresenter implements IAgentImplementation { disabledAgentTools: this.getDisabledAgentTools(sessionId), chatMode: 'agent', conversationId: sessionId, - agentWorkspacePath: projectDir + agentWorkspacePath: projectDir, + activeSkillNames: activeSkillNamesOverride }) this.toolProfileCache.set(sessionId, { diff --git a/src/main/presenter/agentRuntimePresenter/messageStore.ts b/src/main/presenter/agentRuntimePresenter/messageStore.ts index 27f97da5b..78cea62b6 100644 --- a/src/main/presenter/agentRuntimePresenter/messageStore.ts +++ b/src/main/presenter/agentRuntimePresenter/messageStore.ts @@ -740,12 +740,15 @@ export class DeepChatMessageStore { maps?.linkRows.get(row.id) ?? this.sqlitePresenter.deepchatUserMessageLinksTable.listByMessageIds([row.id]) + const rawUserContent = this.parseUserContent(row.content) + const activeSkills = rawUserContent?.activeSkills ?? [] return JSON.stringify({ text: userRow.text, files: fileRows.map((fileRow) => this.toMessageFile(fileRow)), links: linkRows.map((linkRow) => linkRow.url), search: userRow.search_enabled === 1, - think: userRow.think_enabled === 1 + think: userRow.think_enabled === 1, + ...(activeSkills.length > 0 ? { activeSkills } : {}) } satisfies UserMessageContent) } @@ -782,13 +785,29 @@ export class DeepChatMessageStore { ? parsed.links.filter((item): item is string => typeof item === 'string') : [], search: parsed.search === true, - think: parsed.think === true + think: parsed.think === true, + activeSkills: this.normalizeActiveSkills(parsed.activeSkills) } } catch { return null } } + private normalizeActiveSkills(activeSkills?: string[]): string[] { + if (!Array.isArray(activeSkills)) { + return [] + } + + return Array.from( + new Set( + activeSkills + .filter((item): item is string => typeof item === 'string') + .map((item) => item.trim()) + .filter(Boolean) + ) + ) + } + private buildCompactionBlocks(status: 'compacting' | 'compacted'): AssistantMessageBlock[] { return [ { diff --git a/src/main/presenter/agentRuntimePresenter/pendingInputCoordinator.ts b/src/main/presenter/agentRuntimePresenter/pendingInputCoordinator.ts index 0ce64bdcb..b33575ae3 100644 --- a/src/main/presenter/agentRuntimePresenter/pendingInputCoordinator.ts +++ b/src/main/presenter/agentRuntimePresenter/pendingInputCoordinator.ts @@ -13,9 +13,20 @@ function normalizeInput(input: string | SendMessageInput): SendMessageInput { return { text: input, files: [] } } + const activeSkills = Array.isArray(input?.activeSkills) + ? Array.from( + new Set( + input.activeSkills + .map((skillName) => (typeof skillName === 'string' ? skillName.trim() : '')) + .filter((skillName) => skillName.length > 0) + ) + ) + : [] + return { text: typeof input?.text === 'string' ? input.text : '', - files: Array.isArray(input?.files) ? input.files.filter(Boolean) : [] + files: Array.isArray(input?.files) ? input.files.filter(Boolean) : [], + ...(activeSkills.length > 0 ? { activeSkills } : {}) } } diff --git a/src/main/presenter/agentRuntimePresenter/pendingInputStore.ts b/src/main/presenter/agentRuntimePresenter/pendingInputStore.ts index 9621ca4e9..893e09392 100644 --- a/src/main/presenter/agentRuntimePresenter/pendingInputStore.ts +++ b/src/main/presenter/agentRuntimePresenter/pendingInputStore.ts @@ -12,9 +12,20 @@ function normalizeInput(input: string | SendMessageInput): SendMessageInput { return { text: input, files: [] } } + const activeSkills = Array.isArray(input?.activeSkills) + ? Array.from( + new Set( + input.activeSkills + .map((skillName) => (typeof skillName === 'string' ? skillName.trim() : '')) + .filter((skillName) => skillName.length > 0) + ) + ) + : [] + return { text: typeof input?.text === 'string' ? input.text : '', - files: Array.isArray(input?.files) ? input.files.filter(Boolean) : [] + files: Array.isArray(input?.files) ? input.files.filter(Boolean) : [], + ...(activeSkills.length > 0 ? { activeSkills } : {}) } } @@ -108,8 +119,15 @@ export class DeepChatPendingInputStore { const next = normalizeInput(input) const text = [existing.text.trim(), next.text.trim()].filter(Boolean).join('\n\n') const files = [...(existing.files ?? []), ...(next.files ?? [])].filter(Boolean) + const activeSkills = Array.from( + new Set([...(existing.activeSkills ?? []), ...(next.activeSkills ?? [])]) + ) this.sqlitePresenter.deepchatPendingInputsTable.update(itemId, { - payload_json: JSON.stringify({ text, files }) + payload_json: JSON.stringify({ + text, + files, + ...(activeSkills.length > 0 ? { activeSkills } : {}) + }) }) return this.toRecord(this.requireRow(itemId, row.session_id)) } diff --git a/src/main/presenter/agentRuntimePresenter/process.ts b/src/main/presenter/agentRuntimePresenter/process.ts index 6a58dd101..db5b07514 100644 --- a/src/main/presenter/agentRuntimePresenter/process.ts +++ b/src/main/presenter/agentRuntimePresenter/process.ts @@ -18,17 +18,10 @@ import { finalizePaused, persistAbortExceptionPlanState } from './dispatch' +import { isContextWindowErrorLike } from './contextWindowError' const MAX_TOOL_CALLS = 128 const UNKNOWN_CONTEXT_LIMIT = Number.MAX_SAFE_INTEGER -const CONTEXT_WINDOW_ERROR_PATTERNS = [ - 'context length', - 'context window', - 'too many tokens', - 'prompt too long', - 'maximum context length', - 'reduce the length' -] const USER_CANCELED_GENERATION_ERROR = 'common.error.userCanceledGeneration' const NO_MODEL_RESPONSE_ERROR = 'common.error.noModelResponse' type PendingPermissionPayload = NonNullable @@ -38,11 +31,6 @@ function isAbortError(error: unknown): boolean { return error instanceof Error && (error.name === 'AbortError' || error.name === 'CanceledError') } -function isContextWindowErrorMessage(message: string): boolean { - const normalized = message.toLowerCase() - return CONTEXT_WINDOW_ERROR_PATTERNS.some((pattern) => normalized.includes(pattern)) -} - function getLatestErrorMessage(state: StreamState): string | null { for (let index = state.blocks.length - 1; index >= 0; index -= 1) { const block = state.blocks[index] @@ -276,6 +264,22 @@ function appendStreamingProviderPermissionBlock( } } +function replaceLeadingSystemMessage( + messages: ProcessParams['messages'], + systemPrompt: string +): void { + if (!systemPrompt) { + return + } + + if (messages[0]?.role === 'system') { + messages[0] = { ...messages[0], content: systemPrompt } + return + } + + messages.unshift({ role: 'system', content: systemPrompt }) +} + function markStreamingProviderPermissionResolved( block: AssistantMessageBlock, granted: boolean, @@ -485,11 +489,28 @@ export async function processStream(params: ProcessParams): Promise void ) => void + getActiveSkillNames?: () => string[] + activateSkill?: (skillName: string) => Promise normalizeToolResult?: (tool: { sessionId: string toolCallId: string @@ -162,7 +164,11 @@ export interface ProcessResult { export interface ProcessParams { messages: ChatMessage[] tools: MCPToolDefinition[] - refreshTools?: () => Promise + refreshTools?: (activeSkillNames?: string[]) => Promise + refreshSystemPrompt?: ( + activeSkillNames: string[] | undefined, + toolDefinitions: MCPToolDefinition[] + ) => Promise toolPresenter: IToolPresenter | null coreStream: ( messages: ChatMessage[], diff --git a/src/main/presenter/agentSessionPresenter/index.ts b/src/main/presenter/agentSessionPresenter/index.ts index d4433b749..4ed6e2033 100644 --- a/src/main/presenter/agentSessionPresenter/index.ts +++ b/src/main/presenter/agentSessionPresenter/index.ts @@ -406,10 +406,6 @@ export class AgentSessionPresenter { webContentsId }) - if (input.activeSkills && input.activeSkills.length > 0 && this.skillPresenter) { - await this.skillPresenter.setActiveSkills(sessionId, input.activeSkills) - } - // Return enriched session first const state = await agent.getSessionState(sessionId) const sessionResult: SessionWithState = { @@ -437,17 +433,29 @@ export class AgentSessionPresenter { logger.info(`[AgentSessionPresenter] firing queuePendingInput (non-blocking)`) if (agent.queuePendingInput) { agent - .queuePendingInput(sessionId, normalizedInput, { - source: 'send', - projectDir - }) + .queuePendingInput( + sessionId, + this.withInitialMessageActiveSkills(normalizedInput, input.activeSkills), + { + source: 'send', + projectDir + } + ) .catch((err) => { console.error('[AgentSessionPresenter] queuePendingInput failed:', err) }) } else { - agent.processMessage(sessionId, normalizedInput, { projectDir }).catch((err) => { - console.error('[AgentSessionPresenter] processMessage failed:', err) - }) + agent + .processMessage( + sessionId, + this.withInitialMessageActiveSkills(normalizedInput, input.activeSkills), + { + projectDir + } + ) + .catch((err) => { + console.error('[AgentSessionPresenter] processMessage failed:', err) + }) } void this.generateSessionTitle(sessionId, title, providerId, modelId) } @@ -3503,7 +3511,9 @@ export class AgentSessionPresenter { try { const db = this.sqlitePresenter.getDatabase() - const sessionRows = db.prepare('SELECT id FROM new_sessions ORDER BY updated_at ASC').all() as + const sessionRows = db + .prepare('SELECT id FROM new_sessions ORDER BY updated_at ASC') + .all() as | Array<{ id: string }> @@ -3649,7 +3659,8 @@ export class AgentSessionPresenter { ? parsed.links.filter((item): item is string => typeof item === 'string') : [], search: parsed.search === true, - think: parsed.think === true + think: parsed.think === true, + activeSkills: this.normalizeActiveSkills(parsed.activeSkills) } } catch { return null @@ -3956,7 +3967,12 @@ export class AgentSessionPresenter { const files = Array.isArray(content.files) ? content.files.filter((file): file is MessageFile => Boolean(file)) : [] - return { text, files } + const activeSkills = this.normalizeActiveSkills(content.activeSkills) + return { + text, + files, + ...(activeSkills.length > 0 ? { activeSkills } : {}) + } } private normalizeCreateSessionInput(input: CreateSessionInput): SendMessageInput { @@ -3964,7 +3980,18 @@ export class AgentSessionPresenter { const files = Array.isArray(input.files) ? input.files.filter((file): file is MessageFile => Boolean(file)) : [] - return { text, files } + return this.withInitialMessageActiveSkills({ text, files }, input.activeSkills) + } + + private withInitialMessageActiveSkills( + input: SendMessageInput, + activeSkills?: string[] + ): SendMessageInput { + const normalizedActiveSkills = this.normalizeActiveSkills(activeSkills ?? input.activeSkills) + return { + ...input, + ...(normalizedActiveSkills.length > 0 ? { activeSkills: normalizedActiveSkills } : {}) + } } private normalizeDisabledAgentTools( diff --git a/src/main/presenter/browser/YoBrowserToolDefinitions.ts b/src/main/presenter/browser/YoBrowserToolDefinitions.ts index 40b373509..314bbfad6 100644 --- a/src/main/presenter/browser/YoBrowserToolDefinitions.ts +++ b/src/main/presenter/browser/YoBrowserToolDefinitions.ts @@ -1,11 +1,11 @@ import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import type { MCPToolDefinition } from '@shared/presenter' const yoBrowserSchemas = { get_browser_status: z.object({}), load_url: z.object({ - url: z.string().url().describe('URL to load in the session browser') + url: z.url().describe('URL to load in the session browser') }), cdp_send: z.object({ method: z @@ -32,7 +32,7 @@ const yoBrowserSchemas = { export const YO_BROWSER_TOOL_NAMES = ['load_url', 'get_browser_status', 'cdp_send'] as const function asParameters(schema: z.ZodTypeAny) { - return zodToJsonSchema(schema) as { + return toDeepChatJsonSchema(schema) as { type: string properties: Record required?: string[] diff --git a/src/main/presenter/configPresenter/configDbStores.ts b/src/main/presenter/configPresenter/configDbStores.ts index 51314a70a..1fab300a2 100644 --- a/src/main/presenter/configPresenter/configDbStores.ts +++ b/src/main/presenter/configPresenter/configDbStores.ts @@ -13,7 +13,8 @@ export const SENSITIVE_APP_SETTING_KEYS = [ 'hooksNotifications', 'knowledgeConfigs', 'customPrompts', - 'systemPrompts' + 'systemPrompts', + 'skills.managementState' ] as const const SENSITIVE_APP_SETTING_KEY_SET = new Set(SENSITIVE_APP_SETTING_KEYS) diff --git a/src/main/presenter/configPresenter/providerModelHelper.ts b/src/main/presenter/configPresenter/providerModelHelper.ts index daa028866..4cf7efe07 100644 --- a/src/main/presenter/configPresenter/providerModelHelper.ts +++ b/src/main/presenter/configPresenter/providerModelHelper.ts @@ -1,6 +1,11 @@ import logger from '@shared/logger' import { ModelConfig, MODEL_META } from '@shared/presenter' -import { ModelType } from '@shared/model' +import { + isNewApiEndpointType, + ModelType, + resolveNewApiModelTypeFromMetadata, + resolveNewApiSelectableEndpointTypes +} from '@shared/model' import { resolveVideoGenerationCompatType } from '@shared/videoGenerationSettings' import ElectronStore from 'electron-store' import path from 'path' @@ -30,6 +35,16 @@ interface ProviderModelHelperOptions { type ProviderModelStore = StoreLike> +const MODEL_TYPE_VALUES = new Set(Object.values(ModelType)) + +function isModelType(value: unknown): value is ModelType { + return typeof value === 'string' && MODEL_TYPE_VALUES.has(value) +} + +function isNonChatModelType(type: ModelType | undefined): type is ModelType { + return type !== undefined && type !== ModelType.Chat +} + export class ProviderModelHelper { private readonly userDataPath: string private readonly getModelConfig: ModelConfigResolver @@ -127,6 +142,38 @@ export class ProviderModelHelper { return normalizedModel } + private resolveNewApiEffectiveModelType(model: MODEL_META, config?: ModelConfig): ModelType { + const userConfigType = + config?.isUserDefined === true && isModelType(config.type) ? config.type : undefined + if (userConfigType) { + return userConfigType + } + + if (isModelType(model.type)) { + return model.type + } + + const supportedEndpointTypes = (model.supportedEndpointTypes ?? []).filter(isNewApiEndpointType) + const routeEndpointTypes = + supportedEndpointTypes.length > 0 + ? supportedEndpointTypes + : isNewApiEndpointType(model.endpointType) + ? [model.endpointType] + : [] + const metadataType = resolveNewApiModelTypeFromMetadata(routeEndpointTypes, model.id, undefined) + if (metadataType) { + return metadataType + } + + const providerConfigType = + config?.isUserDefined !== true && isModelType(config?.type) ? config.type : undefined + if (isNonChatModelType(providerConfigType)) { + return providerConfigType + } + + return ModelType.Chat + } + private applyResolvedModelConfig(model: MODEL_META, providerId: string): MODEL_META { const normalizedModel = this.cloneModel(model) const config = this.getModelConfig(normalizedModel.id, providerId) @@ -146,6 +193,11 @@ export class ProviderModelHelper { : config.reasoning || false normalizedModel.endpointType = config.endpointType ?? normalizedModel.endpointType normalizedModel.ownedBy = normalizedModel.ownedBy ?? config.ownedBy + if (providerId === 'new-api') { + normalizedModel.type = this.resolveNewApiEffectiveModelType(normalizedModel, config) + return normalizedModel + } + normalizedModel.type = resolveVideoGenerationCompatType({ modelId: normalizedModel.id, @@ -161,6 +213,11 @@ export class ProviderModelHelper { normalizedModel.vision = normalizedModel.vision || false normalizedModel.functionCall = normalizedModel.functionCall || false normalizedModel.reasoning = normalizedModel.reasoning || false + if (providerId === 'new-api') { + normalizedModel.type = this.resolveNewApiEffectiveModelType(normalizedModel) + return normalizedModel + } + normalizedModel.type = resolveVideoGenerationCompatType({ modelId: normalizedModel.id, @@ -172,6 +229,21 @@ export class ProviderModelHelper { return normalizedModel } + private applyNewApiEndpointCompatibility(model: MODEL_META, providerId: string): MODEL_META { + if (providerId !== 'new-api') { + return model + } + + const selectableEndpointTypes = resolveNewApiSelectableEndpointTypes( + model.supportedEndpointTypes, + model.id, + { + type: model.type + } + ) + return selectableEndpointTypes ? { ...model, selectableEndpointTypes } : model + } + getProviderModels(providerId: string): MODEL_META[] { const cached = this.providerModelsCache.get(providerId) if (cached && cached.expiresAt > Date.now()) { @@ -201,7 +273,10 @@ export class ProviderModelHelper { } const result = normalizedStoredModels.map((model) => - this.applyResolvedModelConfig(model, providerId) + this.applyNewApiEndpointCompatibility( + this.applyResolvedModelConfig(model, providerId), + providerId + ) ) this.providerModelsCache.set(providerId, { diff --git a/src/main/presenter/deeplinkPresenter/index.ts b/src/main/presenter/deeplinkPresenter/index.ts index a0a05d674..c63624f9a 100644 --- a/src/main/presenter/deeplinkPresenter/index.ts +++ b/src/main/presenter/deeplinkPresenter/index.ts @@ -384,13 +384,14 @@ export class DeeplinkPresenter implements IDeeplinkPresenter { return } - const settingsWindowId = await presenter.windowPresenter.createSettingsWindow() - if (!settingsWindowId) { - console.error('Failed to open Settings window for MCP install deeplink') + const targetWindow = await this.resolveChatWindow() + if (!targetWindow) { + console.error('Failed to resolve main window for MCP install deeplink') return } - presenter.windowPresenter.sendToWindow(settingsWindowId, DEEPLINK_EVENTS.MCP_INSTALL, { + await this.ensureChatWindowReady(targetWindow.id) + presenter.windowPresenter.sendToWindow(targetWindow.id, DEEPLINK_EVENTS.MCP_INSTALL, { mcpConfig: JSON.stringify(completeMcpConfig) }) diff --git a/src/main/presenter/hooksNotifications/config.ts b/src/main/presenter/hooksNotifications/config.ts index 49617f68a..d1323ca3c 100644 --- a/src/main/presenter/hooksNotifications/config.ts +++ b/src/main/presenter/hooksNotifications/config.ts @@ -9,21 +9,17 @@ import { HooksNotificationsSettings } from '@shared/hooksNotifications' -const HookCommandItemSchema = z - .object({ - id: z.unknown().optional(), - name: z.unknown().optional(), - enabled: z.unknown().optional(), - command: z.unknown().optional(), - events: z.array(z.string()).optional() - }) - .strip() - -const HooksNotificationsSchema = z - .object({ - hooks: z.array(z.unknown()).optional() - }) - .strip() +const HookCommandItemSchema = z.object({ + id: z.unknown().optional(), + name: z.unknown().optional(), + enabled: z.unknown().optional(), + command: z.unknown().optional(), + events: z.array(z.string()).optional() +}) + +const HooksNotificationsSchema = z.object({ + hooks: z.array(z.unknown()).optional() +}) type LooseHookCommandItem = z.infer diff --git a/src/main/presenter/index.ts b/src/main/presenter/index.ts index 58f3c3fa7..99a84daf5 100644 --- a/src/main/presenter/index.ts +++ b/src/main/presenter/index.ts @@ -574,6 +574,8 @@ export class Presenter implements IPresenter { isManagedAgent: (agentId) => agentRepository.getDeepChatAgentConfig(agentId) !== null, getEmbeddings: (providerId, modelId, texts) => this.llmproviderPresenter.getEmbeddings(providerId, modelId, texts), + getDimensions: (providerId, modelId) => + this.llmproviderPresenter.getDimensions(providerId, modelId), generateText: async (providerId, modelId, prompt) => (await this.llmproviderPresenter.generateText(providerId, prompt, modelId, 0.2)).content ?? '', diff --git a/src/main/presenter/llmProviderPresenter/aiSdk/providerFactory.ts b/src/main/presenter/llmProviderPresenter/aiSdk/providerFactory.ts index 43f43db60..e32a2fcf6 100644 --- a/src/main/presenter/llmProviderPresenter/aiSdk/providerFactory.ts +++ b/src/main/presenter/llmProviderPresenter/aiSdk/providerFactory.ts @@ -8,7 +8,7 @@ import { wrapLanguageModel } from 'ai' import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock' import { createAnthropic } from '@ai-sdk/anthropic' import { createAzure } from '@ai-sdk/azure' -import { createGoogleGenerativeAI } from '@ai-sdk/google' +import { createGoogle } from '@ai-sdk/google' import { createVertex } from '@ai-sdk/google-vertex' import { createOpenAI } from '@ai-sdk/openai' import { createOpenAICompatible } from '@ai-sdk/openai-compatible' @@ -627,7 +627,7 @@ export function createAiSdkProviderContext( case 'gemini': { const geminiBaseUrl = normalizeGeminiBaseUrl(baseUrl || undefined) - const provider = createGoogleGenerativeAI({ + const provider = createGoogle({ baseURL: geminiBaseUrl, apiKey: params.provider.apiKey || process.env.GEMINI_API_KEY, headers: params.defaultHeaders, @@ -707,8 +707,8 @@ export function createAiSdkProviderContext( providerOptionsKey: 'bedrock', apiType: 'bedrock', model: maybeWrapModel(provider.languageModel(params.modelId) as any), - embeddingModel: (provider as any).embeddingModel?.(params.modelId), - imageModel: (provider as any).imageModel?.(params.modelId), + embeddingModel: provider.embeddingModel(params.modelId), + imageModel: provider.imageModel(params.modelId), endpoint: bedrockProvider.baseUrl || 'https://bedrock-runtime.amazonaws.com' } } diff --git a/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts b/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts index b62f3d8c8..91198e955 100644 --- a/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts +++ b/src/main/presenter/llmProviderPresenter/aiSdk/runtime.ts @@ -55,8 +55,8 @@ type ImageGenerationRequestOptions = { providerOptions?: Record } -type AiSdkSystemPromptSplit = { - system?: string +type AiSdkPromptSplit = { + instructions?: string messages: ModelMessage[] } @@ -1168,7 +1168,7 @@ async function buildPromptRuntime( return { providerContext, - system: promptSplit.system, + instructions: promptSplit.instructions, messages: promptSplit.messages, providerOptions: providerOptionResult.providerOptions, tools: toolsMap, @@ -1176,7 +1176,7 @@ async function buildPromptRuntime( } } -function splitLeadingSystemMessagesForAiSdk(messages: ModelMessage[]): AiSdkSystemPromptSplit { +function splitLeadingSystemMessagesForAiSdk(messages: ModelMessage[]): AiSdkPromptSplit { const systemContent: string[] = [] let firstConversationIndex = 0 @@ -1194,7 +1194,7 @@ function splitLeadingSystemMessagesForAiSdk(messages: ModelMessage[]): AiSdkSyst } return { - ...(systemContent.length > 0 ? { system: systemContent.join('\n\n') } : {}), + ...(systemContent.length > 0 ? { instructions: systemContent.join('\n\n') } : {}), messages: messages.slice(firstConversationIndex) } } @@ -1254,7 +1254,7 @@ export async function runAiSdkGenerateText( const result = await generateText({ model: runtime.providerContext.model, - ...(runtime.system ? { system: runtime.system } : {}), + ...(runtime.instructions ? { instructions: runtime.instructions } : {}), messages: runtime.messages, allowSystemInMessages: false, providerOptions: runtime.providerOptions as any, @@ -1268,8 +1268,8 @@ export async function runAiSdkGenerateText( return { content: result.text, - reasoning_content: result.reasoningText, - totalUsage: usageToLlmResponse(result.totalUsage) + reasoning_content: result.finalStep.reasoningText, + totalUsage: usageToLlmResponse(result.usage) } } @@ -1461,7 +1461,7 @@ export async function* runAiSdkCoreStream( const result = streamText({ model: runtime.providerContext.model, - ...(runtime.system ? { system: runtime.system } : {}), + ...(runtime.instructions ? { instructions: runtime.instructions } : {}), messages: runtime.messages, allowSystemInMessages: false, tools: runtime.tools, @@ -1474,7 +1474,7 @@ export async function* runAiSdkCoreStream( maxOutputTokens: maxTokens }) - yield* adaptAiSdkStream(result.fullStream, { + yield* adaptAiSdkStream(result.stream, { supportsNativeTools: runtime.supportsNativeTools, cacheImage: (data) => presenter.devicePresenter.cacheImage(data) }) diff --git a/src/main/presenter/llmProviderPresenter/aiSdk/streamAdapter.ts b/src/main/presenter/llmProviderPresenter/aiSdk/streamAdapter.ts index 96437f61f..328484b6d 100644 --- a/src/main/presenter/llmProviderPresenter/aiSdk/streamAdapter.ts +++ b/src/main/presenter/llmProviderPresenter/aiSdk/streamAdapter.ts @@ -74,7 +74,7 @@ export interface AdaptAiSdkStreamOptions { } export async function* adaptAiSdkStream( - fullStream: AsyncIterable>, + stream: AsyncIterable>, options: AdaptAiSdkStreamOptions ): AsyncGenerator { const toolArgumentBuffers = new Map() @@ -144,7 +144,7 @@ export async function* adaptAiSdkStream( } } - for await (const part of fullStream) { + for await (const part of stream) { switch (part.type) { case 'text-delta': { if (options.supportsNativeTools) { diff --git a/src/main/presenter/llmProviderPresenter/aiSdk/toolMapper.ts b/src/main/presenter/llmProviderPresenter/aiSdk/toolMapper.ts index e5cae14fc..dc59e9d29 100644 --- a/src/main/presenter/llmProviderPresenter/aiSdk/toolMapper.ts +++ b/src/main/presenter/llmProviderPresenter/aiSdk/toolMapper.ts @@ -3,6 +3,14 @@ import { jsonSchema, tool, type ToolSet } from 'ai' type JsonSchema = Record const UNSAFE_TOOL_NAMES = new Set(['__proto__', 'constructor', 'prototype']) +const ROOT_SCHEMA_KEYS_TO_DROP = new Set([ + 'anyOf', + 'oneOf', + 'allOf', + '$schema', + '$defs', + 'definitions' +]) function isObjectSchema(value: unknown): value is JsonSchema { return Boolean(value) && typeof value === 'object' && !Array.isArray(value) @@ -39,6 +47,20 @@ function unionRequiredKeys(variants: JsonSchema[]): string[] | undefined { return union.length > 0 ? union : undefined } +function collectRequiredKeys(schema: JsonSchema): string[] | undefined { + if (!Array.isArray(schema.required)) { + return undefined + } + + const required = schema.required.filter((key): key is string => typeof key === 'string') + return required.length > 0 ? required : undefined +} + +function mergeRequiredKeys(...requiredLists: Array): string[] | undefined { + const required = Array.from(new Set(requiredLists.flatMap((requiredList) => requiredList ?? []))) + return required.length > 0 ? required : undefined +} + function mergePropertySchemas(existing: unknown, incoming: unknown): unknown { if (!isObjectSchema(existing) || !isObjectSchema(incoming)) { return incoming @@ -88,6 +110,33 @@ function mergeVariantProperties(variants: JsonSchema[]): Record return merged } +function mergeRootAndVariantProperties( + rootProperties: unknown, + variants: JsonSchema[] +): Record | undefined { + const merged: Record = Object.create(null) + + if (isObjectSchema(rootProperties)) { + for (const [key, value] of Object.entries(rootProperties)) { + if (UNSAFE_TOOL_NAMES.has(key)) { + continue + } + + merged[key] = value + } + } + + const variantProperties = mergeVariantProperties(variants) + + if (variantProperties) { + for (const [key, value] of Object.entries(variantProperties)) { + merged[key] = key in merged ? mergePropertySchemas(merged[key], value) : value + } + } + + return Object.keys(merged).length > 0 ? merged : undefined +} + function normalizeSchemaNode(node: unknown): unknown { if (Array.isArray(node)) { return node.map((item) => normalizeSchemaNode(item)) @@ -117,10 +166,6 @@ export function normalizeToolInputSchema(schema: Record): Recor } } - if (normalized.type === 'object') { - return normalized - } - const branchKey = ['anyOf', 'oneOf', 'allOf'].find((key) => Array.isArray(normalized[key])) const variants = branchKey ? (normalized[branchKey] as unknown[]) @@ -128,7 +173,27 @@ export function normalizeToolInputSchema(schema: Record): Recor .filter((item) => item.type === 'object') : [] - if (!variants.length) { + if (variants.length) { + const { type: _type, properties: _properties, required: _required, ...rest } = normalized + const sanitizedRest = Object.fromEntries( + Object.entries(rest).filter(([key]) => !ROOT_SCHEMA_KEYS_TO_DROP.has(key)) + ) + const branchRequired = + branchKey === 'allOf' ? unionRequiredKeys(variants) : intersectRequiredKeys(variants) + const required = mergeRequiredKeys(collectRequiredKeys(normalized), branchRequired) + + return { + ...sanitizedRest, + type: 'object', + properties: mergeRootAndVariantProperties(normalized.properties, variants) ?? {}, + ...(required ? { required } : {}), + ...(variants.every((variant) => variant.additionalProperties === false) + ? { additionalProperties: false } + : {}) + } + } + + if (branchKey || normalized.type !== 'object') { const required = Array.isArray(normalized.required) ? normalized.required.filter((key): key is string => typeof key === 'string') : undefined @@ -146,22 +211,7 @@ export function normalizeToolInputSchema(schema: Record): Recor } } - const { type: _type, properties: _properties, required: _required, ...rest } = normalized - const sanitizedRest = Object.fromEntries( - Object.entries(rest).filter(([key]) => !['anyOf', 'oneOf', 'allOf'].includes(key)) - ) - const required = - branchKey === 'allOf' ? unionRequiredKeys(variants) : intersectRequiredKeys(variants) - - return { - ...sanitizedRest, - type: 'object', - properties: mergeVariantProperties(variants) ?? {}, - ...(required ? { required } : {}), - ...(variants.every((variant) => variant.additionalProperties === false) - ? { additionalProperties: false } - : {}) - } + return normalized } export function mcpToolsToAISDKTools(tools: MCPToolDefinition[]): ToolSet { diff --git a/src/main/presenter/llmProviderPresenter/baseProvider.ts b/src/main/presenter/llmProviderPresenter/baseProvider.ts index 2e2f5f519..a45f553d0 100644 --- a/src/main/presenter/llmProviderPresenter/baseProvider.ts +++ b/src/main/presenter/llmProviderPresenter/baseProvider.ts @@ -246,25 +246,10 @@ export abstract class BaseLLMProvider { */ public async fetchModels(options?: { suppressErrors?: boolean }): Promise { const suppressErrors = options?.suppressErrors ?? true + let models: MODEL_META[] + try { - return this.fetchProviderModels().then((models) => { - logger.info( - `[Provider] fetchModels: fetched ${models?.length || 0} models for provider "${this.provider.id}"` - ) - // Validate that all models have correct providerId - const validatedModels = models.map((model) => { - if (model.providerId !== this.provider.id) { - logger.warn( - `[Provider] fetchModels: Model ${model.id} has incorrect providerId: expected "${this.provider.id}", got "${model.providerId}". Fixing it.` - ) - model.providerId = this.provider.id - } - return model - }) - this.models = validatedModels - this.configPresenter.setProviderModels(this.provider.id, validatedModels) - return validatedModels - }) + models = await this.fetchProviderModels() } catch (e) { logger.error( `[Provider] fetchModels: Failed to fetch models for provider "${this.provider.id}":`, @@ -278,6 +263,23 @@ export abstract class BaseLLMProvider { } return [] } + + logger.info( + `[Provider] fetchModels: fetched ${models?.length || 0} models for provider "${this.provider.id}"` + ) + // Validate that all models have correct providerId + const validatedModels = models.map((model) => { + if (model.providerId !== this.provider.id) { + logger.warn( + `[Provider] fetchModels: Model ${model.id} has incorrect providerId: expected "${this.provider.id}", got "${model.providerId}". Fixing it.` + ) + model.providerId = this.provider.id + } + return model + }) + this.models = validatedModels + this.configPresenter.setProviderModels(this.provider.id, validatedModels) + return validatedModels } /** diff --git a/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts b/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts index 3ca19d0db..eeb7cdfcc 100644 --- a/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts +++ b/src/main/presenter/llmProviderPresenter/providers/aiSdkProvider.ts @@ -6,6 +6,8 @@ import { isDeepSeekSeriesModelId, isGeminiFamilyModelId, isNewApiEndpointType, + resolveNewApiModelTypeFromMetadata, + resolveNewApiSelectableEndpointTypes, resolveNewApiEndpointTypeFromRoute, resolveProviderCapabilityProviderId, type NewApiEndpointType @@ -220,51 +222,6 @@ const getNewApiPreferredCapabilityProviderIds = ( return providerIds } -const isOpenAiFamilyNewApiModel = (modelId: string, ownedBy?: string): boolean => { - const normalizedOwner = ownedBy?.trim().toLowerCase() ?? '' - if (normalizedOwner.includes('openai')) { - return true - } - - const normalizedModelId = modelId.trim().toLowerCase() - return /^(?:gpt-|chatgpt-|o[1-9](?:[.-]|$))/.test(normalizedModelId) -} - -const isNewApiResponsesIncompatibleModelId = (modelId: string): boolean => { - const normalizedModelId = modelId.trim().toLowerCase() - return ( - normalizedModelId.startsWith('tts-') || - normalizedModelId.startsWith('whisper-') || - normalizedModelId.startsWith('audio-') || - normalizedModelId.includes('speech') || - normalizedModelId.includes('transcribe') - ) -} - -const resolveNewApiSelectableEndpointTypes = ( - supportedEndpointTypes: NewApiEndpointType[], - modelId: string, - normalizedRawType: string, - ownedBy?: string -): NewApiEndpointType[] | undefined => { - if ( - !supportedEndpointTypes.includes('openai') || - supportedEndpointTypes.includes('openai-response') || - normalizedRawType !== ModelType.Chat || - !isOpenAiFamilyNewApiModel(modelId, ownedBy) || - isNewApiResponsesIncompatibleModelId(modelId) - ) { - return undefined - } - - const openaiIndex = supportedEndpointTypes.indexOf('openai') - return [ - ...supportedEndpointTypes.slice(0, openaiIndex + 1), - 'openai-response', - ...supportedEndpointTypes.slice(openaiIndex + 1) - ] -} - export function normalizeExtractedImageText(content: string): string { const normalized = content .replace(/\r\n/g, '\n') @@ -1949,35 +1906,19 @@ export class AiSdkProvider extends BaseLLMProvider { const normalizedRawType = typeof rawModel.type === 'string' ? rawModel.type.trim().toLowerCase() : '' - const normalizedModelId = rawModel.id.toLowerCase() - const type = - normalizedRawType === 'imagegeneration' || - normalizedRawType === 'image-generation' || - normalizedRawType === 'image' || - rawSupportedEndpointTypes.includes('image-generation') - ? ModelType.ImageGeneration - : normalizedRawType === 'videogeneration' || - normalizedRawType === 'video-generation' || - normalizedRawType === 'video' || - rawSupportedEndpointTypes.includes('video-generation') - ? ModelType.VideoGeneration - : normalizedRawType === 'tts' || - normalizedRawType === 'audio-speech' || - normalizedRawType === 'audiospeech' - ? ModelType.TTS - : normalizedRawType === 'embedding' || - normalizedRawType === 'embeddings' || - normalizedModelId.includes('embedding') - ? ModelType.Embedding - : normalizedRawType === 'rerank' || normalizedModelId.includes('rerank') - ? ModelType.Rerank - : undefined + const type = resolveNewApiModelTypeFromMetadata( + rawSupportedEndpointTypes, + rawModel.id, + normalizedRawType + ) const supportedEndpointTypes = rawSupportedEndpointTypes const selectableEndpointTypes = resolveNewApiSelectableEndpointTypes( rawSupportedEndpointTypes, rawModel.id, - normalizedRawType, - ownedBy + { + type, + rawType: normalizedRawType + } ) const contextLengthCandidate = [ diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/appleServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/appleServer.ts index 2887003d0..4564b8ec3 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/appleServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/appleServer.ts @@ -3,7 +3,7 @@ import logger from '@shared/logger' import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { z } from 'zod' import { runAppleScript } from 'run-applescript' import { run } from '@jxa/run' @@ -1210,7 +1210,7 @@ export class AppleServer { { name: 'calendar', description: 'Search, create, and open calendar events in Apple Calendar app', - inputSchema: zodToJsonSchema(CalendarArgsSchema), + inputSchema: toDeepChatJsonSchema(CalendarArgsSchema), annotations: { title: 'Apple Calendar', destructiveHint: false @@ -1219,7 +1219,7 @@ export class AppleServer { { name: 'contacts', description: 'Search and retrieve contacts from Apple Contacts app', - inputSchema: zodToJsonSchema(ContactsArgsSchema), + inputSchema: toDeepChatJsonSchema(ContactsArgsSchema), annotations: { title: 'Apple Contacts', readOnlyHint: true @@ -1229,7 +1229,7 @@ export class AppleServer { name: 'mail', description: 'Interact with Apple Mail app - read unread emails, search emails, and send emails', - inputSchema: zodToJsonSchema(MailArgsSchema), + inputSchema: toDeepChatJsonSchema(MailArgsSchema), annotations: { title: 'Apple Mail', destructiveHint: false, @@ -1240,7 +1240,7 @@ export class AppleServer { name: 'maps', description: 'Search locations, manage guides, save favorites, and get directions using Apple Maps', - inputSchema: zodToJsonSchema(MapsArgsSchema), + inputSchema: toDeepChatJsonSchema(MapsArgsSchema), annotations: { title: 'Apple Maps', destructiveHint: false @@ -1250,7 +1250,7 @@ export class AppleServer { name: 'messages', description: 'Interact with Apple Messages app - send, read, schedule messages and check unread messages', - inputSchema: zodToJsonSchema(MessagesArgsSchema), + inputSchema: toDeepChatJsonSchema(MessagesArgsSchema), annotations: { title: 'Apple Messages', destructiveHint: false, @@ -1260,7 +1260,7 @@ export class AppleServer { { name: 'notes', description: 'Search, retrieve and create notes in Apple Notes app', - inputSchema: zodToJsonSchema(NotesArgsSchema), + inputSchema: toDeepChatJsonSchema(NotesArgsSchema), annotations: { title: 'Apple Notes', destructiveHint: false @@ -1269,7 +1269,7 @@ export class AppleServer { { name: 'reminders', description: 'Search, create, and open reminders in Apple Reminders app', - inputSchema: zodToJsonSchema(RemindersArgsSchema), + inputSchema: toDeepChatJsonSchema(RemindersArgsSchema), annotations: { title: 'Apple Reminders', destructiveHint: false diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/artifactsServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/artifactsServer.ts index eae8d4cc1..478190642 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/artifactsServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/artifactsServer.ts @@ -1,7 +1,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' // Artifacts 相关的常量定义 @@ -580,7 +580,7 @@ export class ArtifactsServer { 'Do not call this function repeatedly if instructions or definitions for the requested artifact type are already available in the current context. ' + 'Specify the desired artifact category through the type parameter: code, documents, html, svg, mermaid, or react. ' + 'After obtaining the instructions, use them appropriately and avoid duplicate calls for the same type.', - inputSchema: zodToJsonSchema(GetArtifactInstructionsArgsSchema), + inputSchema: toDeepChatJsonSchema(GetArtifactInstructionsArgsSchema), annotations: { title: 'Get Artifact Instructions', readOnlyHint: true diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/autoPromptingServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/autoPromptingServer.ts index 390ec8a66..2eb9b65dc 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/autoPromptingServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/autoPromptingServer.ts @@ -6,7 +6,7 @@ import { } from '@modelcontextprotocol/sdk/types.js' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { presenter } from '@/presenter' import { Prompt } from '@shared/presenter' import { isSafeRegexPattern } from '@shared/regexValidator' @@ -47,8 +47,8 @@ const FillTemplateArgsSchema = z.object({ }) // Zod Schema 转换为 JSON Schema -const GetTemplateParametersArgsJsonSchema = zodToJsonSchema(GetTemplateParametersArgsSchema) -const FillTemplateArgsJsonSchema = zodToJsonSchema(FillTemplateArgsSchema) +const GetTemplateParametersArgsJsonSchema = toDeepChatJsonSchema(GetTemplateParametersArgsSchema) +const FillTemplateArgsJsonSchema = toDeepChatJsonSchema(FillTemplateArgsSchema) // --- MCP Server 实现 --- export class AutoPromptingServer { @@ -111,7 +111,7 @@ export class AutoPromptingServer { { name: 'list_all_prompt_template_names', description: '获取所有可用提示词模板的名称列表。', - inputSchema: zodToJsonSchema(z.object({})), // 无需参数 + inputSchema: toDeepChatJsonSchema(z.object({})), // 无需参数 annotations: { title: 'List Prompt Template Names', readOnlyHint: true @@ -160,7 +160,7 @@ export class AutoPromptingServer { const parsed = GetTemplateParametersArgsSchema.safeParse(args) if (!parsed.success) { throw new Error( - `Invalid parameters for get_prompt_template_parameters: ${parsed.error.errors.map((e) => e.message).join(', ')}` + `Invalid parameters for get_prompt_template_parameters: ${parsed.error.issues.map((e) => e.message).join(', ')}` ) } @@ -180,7 +180,7 @@ export class AutoPromptingServer { const parsed = FillTemplateArgsSchema.safeParse(args) if (!parsed.success) { throw new Error( - `Invalid parameters for fill_prompt_template: ${parsed.error.errors.map((e) => e.message).join(', ')}` + `Invalid parameters for fill_prompt_template: ${parsed.error.issues.map((e) => e.message).join(', ')}` ) } diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/bochaSearchServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/bochaSearchServer.ts index 2bedf540e..5e7288181 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/bochaSearchServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/bochaSearchServer.ts @@ -1,7 +1,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import axios from 'axios' @@ -132,7 +132,7 @@ export class BochaSearchServer { name: 'bocha_web_search', description: 'Search with Bocha Web Search and get enhanced search details from billions of web documents, including page titles, urls, summaries, site names, site icons, publication dates, image links, and more.', // 官方描述 - inputSchema: zodToJsonSchema(BochaWebSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(BochaWebSearchArgsSchema), annotations: { title: 'Bocha Web Search', readOnlyHint: true, @@ -143,7 +143,7 @@ export class BochaSearchServer { name: 'bocha_ai_search', description: 'Search with Bocha AI Search, recognizes the semantics of search terms and additionally returns structured modal cards with content from vertical domains.', // 官方描述 - inputSchema: zodToJsonSchema(BochaAiSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(BochaAiSearchArgsSchema), annotations: { title: 'Bocha AI Search', readOnlyHint: true, diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/braveSearchServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/braveSearchServer.ts index d25b45d2a..28bd2797c 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/braveSearchServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/braveSearchServer.ts @@ -1,7 +1,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import axios from 'axios' @@ -313,7 +313,7 @@ export class BraveSearchServer { 'Use this for broad information gathering, recent events, or when you need diverse web sources. ' + 'Supports pagination, content filtering, and freshness controls. ' + 'Maximum 20 results per request, with offset for pagination. ', - inputSchema: zodToJsonSchema(BraveWebSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(BraveWebSearchArgsSchema), annotations: { title: 'Brave Web Search', readOnlyHint: true, @@ -331,7 +331,7 @@ export class BraveSearchServer { '- Phone numbers and opening hours\n' + "Use this when the query implies 'near me' or mentions specific locations. " + 'Automatically falls back to web search if no local results are found.', - inputSchema: zodToJsonSchema(BraveLocalSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(BraveLocalSearchArgsSchema), annotations: { title: 'Brave Local Search', readOnlyHint: true, diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/builtinKnowledgeServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/builtinKnowledgeServer.ts index 722365a4d..e2d9d2386 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/builtinKnowledgeServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/builtinKnowledgeServer.ts @@ -1,7 +1,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import { BuiltinKnowledgeConfig, MCPTextContent, QueryResult } from '@shared/presenter' import { presenter } from '@/presenter' @@ -42,7 +42,7 @@ export class BuiltinKnowledgeServer { return { name: `builtin_knowledge_search${suffix}`, description: config.description, - inputSchema: zodToJsonSchema(BuiltinKnowledgeSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(BuiltinKnowledgeSearchArgsSchema), annotations: { title: 'Builtin Knowledge Search', readOnlyHint: true diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/conversationSearchServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/conversationSearchServer.ts index 74306ec15..7a31b1c4f 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/conversationSearchServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/conversationSearchServer.ts @@ -2,7 +2,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import { presenter } from '@/presenter' // 导入全局的 presenter 对象 import { isSafeRegexPattern } from '@shared/regexValidator' @@ -457,7 +457,7 @@ export class ConversationSearchServer { name: 'search_conversations', description: 'Search historical conversation records, supports title and content search', - inputSchema: zodToJsonSchema(SearchConversationsArgsSchema), + inputSchema: toDeepChatJsonSchema(SearchConversationsArgsSchema), annotations: { title: 'Search Conversations', readOnlyHint: true @@ -467,7 +467,7 @@ export class ConversationSearchServer { name: 'search_messages', description: 'Search historical message records, supports filtering by conversation ID, role and other conditions', - inputSchema: zodToJsonSchema(SearchMessagesArgsSchema), + inputSchema: toDeepChatJsonSchema(SearchMessagesArgsSchema), annotations: { title: 'Search Messages', readOnlyHint: true @@ -476,7 +476,7 @@ export class ConversationSearchServer { { name: 'get_conversation_history', description: 'Get complete history of a specific conversation', - inputSchema: zodToJsonSchema(GetConversationHistoryArgsSchema), + inputSchema: toDeepChatJsonSchema(GetConversationHistoryArgsSchema), annotations: { title: 'Get Conversation History', readOnlyHint: true @@ -485,7 +485,7 @@ export class ConversationSearchServer { { name: 'get_conversation_stats', description: 'Get conversation statistics including totals, recent activity and more', - inputSchema: zodToJsonSchema(GetConversationStatsArgsSchema), + inputSchema: toDeepChatJsonSchema(GetConversationStatsArgsSchema), annotations: { title: 'Get Conversation Stats', readOnlyHint: true diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/deepResearchServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/deepResearchServer.ts index 43b829a5f..10e92056b 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/deepResearchServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/deepResearchServer.ts @@ -6,7 +6,7 @@ import logger from '@shared/logger' import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import axios from 'axios' import { presenter } from '@/presenter' @@ -284,7 +284,7 @@ export class DeepResearchServer { { name: 'start_deep_research', description: '启动一个新的深度研究会话。返回 session_id 用于后续操作。', - inputSchema: zodToJsonSchema(StartDeepResearchArgsSchema), + inputSchema: toDeepChatJsonSchema(StartDeepResearchArgsSchema), annotations: { title: 'Start Deep Research', destructiveHint: false @@ -293,7 +293,7 @@ export class DeepResearchServer { { name: 'execute_single_web_search', description: '在研究会话内执行一次网页搜索。', - inputSchema: zodToJsonSchema(SingleWebSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(SingleWebSearchArgsSchema), annotations: { title: 'Execute Web Search', readOnlyHint: false, @@ -303,7 +303,7 @@ export class DeepResearchServer { { name: 'request_research_data', description: '请求当前会话中新增的搜索结果和研究背景,供 LLM 反思。', - inputSchema: zodToJsonSchema(RequestResearchDataArgsSchema), + inputSchema: toDeepChatJsonSchema(RequestResearchDataArgsSchema), annotations: { title: 'Request Research Data', readOnlyHint: true @@ -312,7 +312,7 @@ export class DeepResearchServer { { name: 'submit_reflection_results', description: 'LLM 提交其对研究数据的反思结果(如是否需更多研究、建议查询等)。', - inputSchema: zodToJsonSchema(SubmitReflectionResultsArgsSchema), + inputSchema: toDeepChatJsonSchema(SubmitReflectionResultsArgsSchema), annotations: { title: 'Submit Reflection Results', destructiveHint: false @@ -321,7 +321,7 @@ export class DeepResearchServer { { name: 'generate_final_answer', description: '根据累积研究生成最终答案,并清理会话数据。', - inputSchema: zodToJsonSchema(GenerateFinalAnswerArgsSchema), + inputSchema: toDeepChatJsonSchema(GenerateFinalAnswerArgsSchema), annotations: { title: 'Generate Final Answer', destructiveHint: true diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/difyKnowledgeServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/difyKnowledgeServer.ts index 6f77d6c27..1cce89f9d 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/difyKnowledgeServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/difyKnowledgeServer.ts @@ -2,7 +2,7 @@ import logger from '@shared/logger' import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import axios from 'axios' @@ -130,7 +130,7 @@ export class DifyKnowledgeServer { return { name: `dify_knowledge_search${suffix}`, description: config.description, - inputSchema: zodToJsonSchema(DifyKnowledgeSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(DifyKnowledgeSearchArgsSchema), annotations: { title: 'Dify Knowledge Search', readOnlyHint: true, diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/fastGptKnowledgeServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/fastGptKnowledgeServer.ts index 972706378..b532b0e0d 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/fastGptKnowledgeServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/fastGptKnowledgeServer.ts @@ -1,7 +1,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import axios from 'axios' @@ -118,7 +118,7 @@ export class FastGptKnowledgeServer { return { name: `fastgpt_knowledge_search${suffix}`, description: config.description, - inputSchema: zodToJsonSchema(FastGptKnowledgeSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(FastGptKnowledgeSearchArgsSchema), annotations: { title: 'FastGPT Knowledge Search', readOnlyHint: true, diff --git a/src/main/presenter/mcpPresenter/inMemoryServers/ragflowKnowledgeServer.ts b/src/main/presenter/mcpPresenter/inMemoryServers/ragflowKnowledgeServer.ts index c0bda1c09..3d20f754a 100644 --- a/src/main/presenter/mcpPresenter/inMemoryServers/ragflowKnowledgeServer.ts +++ b/src/main/presenter/mcpPresenter/inMemoryServers/ragflowKnowledgeServer.ts @@ -2,7 +2,7 @@ import logger from '@shared/logger' import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' import axios from 'axios' @@ -129,7 +129,7 @@ export class RagflowKnowledgeServer { return { name: `ragflow_knowledge_search${suffix}`, description: config.description, - inputSchema: zodToJsonSchema(RagflowKnowledgeSearchArgsSchema), + inputSchema: toDeepChatJsonSchema(RagflowKnowledgeSearchArgsSchema), annotations: { title: 'RAGFlow Knowledge Search', readOnlyHint: true, diff --git a/src/main/presenter/memoryPresenter/index.ts b/src/main/presenter/memoryPresenter/index.ts index f8a7e286c..813b8ea22 100644 --- a/src/main/presenter/memoryPresenter/index.ts +++ b/src/main/presenter/memoryPresenter/index.ts @@ -107,7 +107,11 @@ const CONSOLIDATION_MAX_LLM_CALLS = 8 const CONSOLIDATION_MAX_INPUT_TOKENS = 24000 const CONSOLIDATION_MERGE_SIMILARITY = 0.85 const MAINTENANCE_START_DELAY_MS = 60 * 1000 +const STARTUP_PREWARM_DELAY_MS = 3 * 1000 const STARTUP_ARM_STAGGER_MS = 5 * 1000 +const STARTUP_PREWARM_STAGGER_MS = 1500 +const EMBEDDING_PREWARM_TEXT = 'memory warmup' +const WARM_DIMENSION_FAILURE_COOLDOWN_MS = 30 * 1000 const ARCHIVE_DECAY_THRESHOLD = 0.05 const ARCHIVE_AGE_MS = 90 * 24 * 60 * 60 * 1000 @@ -226,7 +230,11 @@ export class MemoryPresenter implements MemoryRuntimePort { // by two DuckDBInstances at once. private readonly vectorStores = new Map>() private readonly vectorStoreIdentities = new Map() + private readonly vectorStoreReady = new Map() + private readonly vectorStoreWarmups = new Map>() + private readonly vectorStoreDimensionFailures = new Map() private readonly vectorStoreLocks = new Map>() + private readonly embeddingWarmups = new Map>() // Serializes an agent's embedding drains. Distinct from vectorStoreLocks on purpose: this one // spans the network embedding call, the file lock must not. private readonly embeddingDrains = new Map>() @@ -242,6 +250,8 @@ export class MemoryPresenter implements MemoryRuntimePort { // In-flight timer-fired consolidation passes; dispose() awaits them so none writes after teardown. private readonly consolidationRuns = new Set>() private maintenanceStartTimer: NodeJS.Timeout | null = null + private prewarmStartTimer: NodeJS.Timeout | null = null + private readonly prewarmTimers = new Map() private maintenanceStarted = false // Per-agent watermark for a reflection attempt that ran the model but wrote nothing new (empty or // all-duplicate output). Lets a quiet agent stop re-spending the model on the same units until @@ -266,6 +276,12 @@ export class MemoryPresenter implements MemoryRuntimePort { startBackgroundMaintenance(): void { if (this.disposed || this.maintenanceStarted) return this.maintenanceStarted = true + this.prewarmStartTimer = setTimeout(() => { + this.prewarmStartTimer = null + if (this.disposed) return + this.warmActiveAgents() + }, STARTUP_PREWARM_DELAY_MS) + if (typeof this.prewarmStartTimer.unref === 'function') this.prewarmStartTimer.unref() this.maintenanceStartTimer = setTimeout(() => { this.maintenanceStartTimer = null if (this.disposed) return @@ -275,6 +291,12 @@ export class MemoryPresenter implements MemoryRuntimePort { } stopBackgroundMaintenance(): void { + if (this.prewarmStartTimer) { + clearTimeout(this.prewarmStartTimer) + this.prewarmStartTimer = null + } + for (const timer of this.prewarmTimers.values()) clearTimeout(timer) + this.prewarmTimers.clear() if (this.maintenanceStartTimer) { clearTimeout(this.maintenanceStartTimer) this.maintenanceStartTimer = null @@ -293,6 +315,15 @@ export class MemoryPresenter implements MemoryRuntimePort { } } + warmActiveAgents(): void { + if (this.disposed) return + try { + this.warmActiveAgentsStaggered(this.deps.repository.listAgentIdsWithMemories()) + } catch (error) { + logger.warn(`[Memory] startup prewarm skipped: ${String(error)}`) + } + } + private armActiveAgentsStaggered(agentIds: string[]): void { if (this.disposed) return agentIds @@ -306,6 +337,39 @@ export class MemoryPresenter implements MemoryRuntimePort { }) } + private warmActiveAgentsStaggered(agentIds: string[]): void { + if (this.disposed) return + agentIds + .filter((agentId) => this.shouldArmMaintenance(agentId)) + .sort() + .forEach((agentId, index) => { + this.clearPrewarmTimer(agentId) + const timer = setTimeout(() => { + if (this.prewarmTimers.get(agentId) === timer) this.prewarmTimers.delete(agentId) + if (this.disposed || !this.canReadAgentMemory(agentId)) return + const embedding = this.deps.resolveAgentConfig(agentId)?.memoryEmbedding + if (!embedding?.providerId || !embedding?.modelId) return + void this.warmVectorStore(agentId, { + providerId: embedding.providerId, + modelId: embedding.modelId + }) + this.warmEmbeddingConnection(agentId, { + providerId: embedding.providerId, + modelId: embedding.modelId + }) + }, index * STARTUP_PREWARM_STAGGER_MS) + this.prewarmTimers.set(agentId, timer) + if (typeof timer.unref === 'function') timer.unref() + }) + } + + private clearPrewarmTimer(agentId: string): void { + const timer = this.prewarmTimers.get(agentId) + if (!timer) return + clearTimeout(timer) + this.prewarmTimers.delete(agentId) + } + isEnabled(agentId: string): boolean { return this.deps.resolveAgentConfig(agentId)?.memoryEnabled === true } @@ -562,6 +626,15 @@ export class MemoryPresenter implements MemoryRuntimePort { this.deps.repository.updatePendingEmbeddingStatus(agentId, record.memoryId, 'error') } } + if (!outcome.usable) { + this.clearVectorStoreReady(agentId) + } else if (outcome.written.size > 0 && !this.hasStaleEmbeddings(agentId, dim, fingerprint)) { + this.markVectorStoreReady( + agentId, + { providerId: embedding.providerId, modelId: embedding.modelId }, + dim + ) + } } catch (error) { // Embeddings succeeded but the vector store write failed: terminal for this batch. logger.error(`[Memory] vector store write failed for ${agentId}: ${String(error)}`) @@ -578,6 +651,7 @@ export class MemoryPresenter implements MemoryRuntimePort { // callers onto one run; never throws (the chat path must not be blocked). reindexEmbeddings(agentId: string, force = false): Promise { if (this.disposed) return Promise.resolve() + this.clearVectorStoreReady(agentId) const inflight = this.reindexing.get(agentId) if (inflight) return inflight const tracked = this.runReindex(agentId, force).finally(() => { @@ -754,6 +828,9 @@ export class MemoryPresenter implements MemoryRuntimePort { let neighbors: MemoryRecallItem[] = [] try { + // Cold vector stores deliberately degrade this neighbor lookup to FTS-only. Exact provenance + // dedupe already ran above, and semantic merging can recover once the store warms; blocking a + // write here would reintroduce the same first-turn DuckDB cold-start stall. const hits = await this.retrieve(agentId, content, now, false) neighbors = hits.slice(0, DECISION_NEIGHBOR_TOP_S) } catch (error) { @@ -1143,6 +1220,15 @@ export class MemoryPresenter implements MemoryRuntimePort { now: number, model: { providerId: string; modelId: string } ): Promise { + const embedding = this.deps.resolveAgentConfig(agentId)?.memoryEmbedding + if (embedding?.providerId && embedding?.modelId) { + await this.warmVectorStore(agentId, { + providerId: embedding.providerId, + modelId: embedding.modelId + }) + if (!this.canWriteAgentMemory(agentId)) return false + } + const active = this.deps.repository .listByAgent(agentId) .filter((row) => row.kind !== 'persona') @@ -1547,6 +1633,8 @@ export class MemoryPresenter implements MemoryRuntimePort { options: { limit?: number } = {} ): Promise { if (!this.canReadAgentMemory(agentId)) return [] + // Management search follows recall's cold-store contract: return FTS-only hits immediately and + // let the background warm restore vector results on the next query. const hits = await this.retrieve(agentId, query, Date.now(), false) const limited = options.limit != null ? hits.slice(0, Math.max(0, Math.floor(options.limit))) : hits @@ -1641,78 +1729,84 @@ export class MemoryPresenter implements MemoryRuntimePort { const vecMatches: { row: AgentMemoryRow; similarity: number }[] = [] const embedding = config?.memoryEmbedding if (embedding?.providerId && embedding?.modelId) { - try { - const vectors = await this.deps.getEmbeddings(embedding.providerId, embedding.modelId, [ - normalizedQuery - ]) - // Teardown may have started during the embedding await: bail before opening the store so a - // late recall cannot reopen a sidecar the dispose close-loop has already passed. - if (!this.canReadAgentMemory(agentId)) return [] - const vector = vectors[0] - if (vector?.length) { - const fingerprint = embeddingFingerprint(embedding.providerId, embedding.modelId) - if (this.hasStaleEmbeddings(agentId, vector.length, fingerprint)) { - // The embedding model or dimension changed: rebuild vectors in the background and - // answer from FTS this turn instead of querying a store with stale dimensions. Skipped - // during teardown so no background write outlives the database connection. - if (this.canReadAgentMemory(agentId)) { - void this.reindexEmbeddings(agentId).catch((error) => { - logger.warn(`[Memory] reindex failed for ${agentId}: ${String(error)}`) - }) - } - } else { - const store = await this.getVectorStore( - agentId, - { providerId: embedding.providerId, modelId: embedding.modelId }, - vector.length - ) - // Teardown may have begun while the store opened: bail before querying or reading rows. - // dispose awaits the per-agent open lock, so the store this call cached is closed there. - if (!this.canReadAgentMemory(agentId)) return [] - if (store.isUsable()) { - const matches = await store.query(vector, { topK: candidateLimit }) - // ...and again after the query await, before any repository.getById on a closing DB. - if (!this.canReadAgentMemory(agentId)) return [] - for (const match of matches) { - const similarity = distanceToSimilarity(match.distance) - if (similarity < similarityThreshold) continue - const row = this.deps.repository.getById(match.memoryId) - // Skip persona even if an old/anomalous vector for it sits in the store: the - // self-model is injected separately, never recalled as a normal memory. working - // rows are never embedded, but skip them defensively too. Archived rows keep their - // vector but must stay out of recall until restored. - if ( - !row || - row.superseded_by || - row.kind === 'persona' || - row.kind === 'working' || - row.status === 'archived' || - row.status === 'conflicted' - ) - continue - vecMatches.push({ row, similarity }) + const currentEmbedding = { providerId: embedding.providerId, modelId: embedding.modelId } + if (!this.isVectorStoreWarm(agentId, currentEmbedding)) { + void this.warmVectorStore(agentId, currentEmbedding) + this.warmEmbeddingConnection(agentId, currentEmbedding) + } else { + try { + const vectors = await this.deps.getEmbeddings(embedding.providerId, embedding.modelId, [ + normalizedQuery + ]) + // Teardown may have started during the embedding await: bail before opening the store so a + // late recall cannot reopen a sidecar the dispose close-loop has already passed. + if (!this.canReadAgentMemory(agentId)) return [] + const vector = vectors[0] + if (vector?.length) { + const fingerprint = embeddingFingerprint(embedding.providerId, embedding.modelId) + if (this.hasStaleEmbeddings(agentId, vector.length, fingerprint)) { + this.clearVectorStoreReady(agentId) + // The embedding model or dimension changed: rebuild vectors in the background and + // answer from FTS this turn instead of querying a store with stale dimensions. Skipped + // during teardown so no background write outlives the database connection. + if (this.canReadAgentMemory(agentId)) { + void this.reindexEmbeddings(agentId).catch((error) => { + logger.warn(`[Memory] reindex failed for ${agentId}: ${String(error)}`) + }) } - // The service embedded the query and the store is healthy: opportunistically embed - // rows deferred as fts_only (config added later) and re-drain any an earlier run left - // pending. Background, coalesced, and skipped while a reindex owns the requeue. - if (this.canReadAgentMemory(agentId) && !this.reindexing.has(agentId)) { - void this.backfillEmbeddings(agentId).catch((error) => { - logger.warn(`[Memory] backfill failed for ${agentId}: ${String(error)}`) + } else { + const store = await this.getVectorStore(agentId, currentEmbedding, vector.length) + // Teardown may have begun while the store opened: bail before querying or reading rows. + // dispose awaits the per-agent open lock, so the store this call cached is closed there. + if (!this.canReadAgentMemory(agentId)) return [] + if (store.isUsable()) { + this.markVectorStoreReady(agentId, currentEmbedding, vector.length) + const matches = await store.query(vector, { topK: candidateLimit }) + // ...and again after the query await, before any repository.getById on a closing DB. + if (!this.canReadAgentMemory(agentId)) return [] + for (const match of matches) { + const similarity = distanceToSimilarity(match.distance) + if (similarity < similarityThreshold) continue + const row = this.deps.repository.getById(match.memoryId) + // Skip persona even if an old/anomalous vector for it sits in the store: the + // self-model is injected separately, never recalled as a normal memory. working + // rows are never embedded, but skip them defensively too. Archived rows keep their + // vector but must stay out of recall until restored. + if ( + !row || + row.superseded_by || + row.kind === 'persona' || + row.kind === 'working' || + row.status === 'archived' || + row.status === 'conflicted' + ) + continue + vecMatches.push({ row, similarity }) + } + // The service embedded the query and the store is healthy: opportunistically embed + // rows deferred as fts_only (config added later) and re-drain any an earlier run left + // pending. Background, coalesced, and skipped while a reindex owns the requeue. + if (this.canReadAgentMemory(agentId) && !this.reindexing.has(agentId)) { + void this.backfillEmbeddings(agentId).catch((error) => { + logger.warn(`[Memory] backfill failed for ${agentId}: ${String(error)}`) + }) + } + } else if (this.canReadAgentMemory(agentId) && !this.reindexing.has(agentId)) { + this.clearVectorStoreReady(agentId) + // The on-disk sidecar carries a foreign/legacy identity we can never query (and there + // were no embedded rows to flag it as stale). Rebuild it under the current identity so + // the corpus stops failing closed; force the reset even if there is nothing to + // re-queue, since the unusable file itself is what blocks recovery. + void this.reindexEmbeddings(agentId, true).catch((error) => { + logger.warn(`[Memory] store rebuild failed for ${agentId}: ${String(error)}`) }) } - } else if (this.canReadAgentMemory(agentId) && !this.reindexing.has(agentId)) { - // The on-disk sidecar carries a foreign/legacy identity we can never query (and there - // were no embedded rows to flag it as stale). Rebuild it under the current identity so - // the corpus stops failing closed; force the reset even if there is nothing to - // re-queue, since the unusable file itself is what blocks recovery. - void this.reindexEmbeddings(agentId, true).catch((error) => { - logger.warn(`[Memory] store rebuild failed for ${agentId}: ${String(error)}`) - }) } } + } catch (error) { + this.clearVectorStoreReady(agentId) + logger.warn(`[Memory] vector recall degraded to FTS for ${agentId}: ${String(error)}`) } - } catch (error) { - logger.warn(`[Memory] vector recall degraded to FTS for ${agentId}: ${String(error)}`) } } @@ -2238,6 +2332,7 @@ export class MemoryPresenter implements MemoryRuntimePort { async cleanupDeletedAgentResources(agentId: string): Promise { if (this.disposed) return this.assertSafeAgentId(agentId) + this.clearPrewarmTimer(agentId) let resetError: unknown try { await this.runExclusiveForAgent(agentId, async () => { @@ -2281,10 +2376,22 @@ export class MemoryPresenter implements MemoryRuntimePort { const embeddingDrain = this.embeddingDrains.get(agentId) const vectorStoreLock = this.vectorStoreLocks.get(agentId) const personaLock = this.personaLocks.get(agentId) + const vectorWarmups = [...this.vectorStoreWarmups.entries()].filter(([key]) => + key.startsWith(`${agentId}::`) + ) + const embeddingWarmups = [...this.embeddingWarmups.entries()].filter(([key]) => + key.startsWith(`${agentId}::`) + ) await Promise.allSettled( - [reindexing, backfilling, embeddingDrain, vectorStoreLock, personaLock].filter( - (promise): promise is Promise => Boolean(promise) - ) + [ + reindexing, + backfilling, + embeddingDrain, + vectorStoreLock, + personaLock, + ...vectorWarmups.map(([, promise]) => promise), + ...embeddingWarmups.map(([, promise]) => promise) + ].filter((promise): promise is Promise => Boolean(promise)) ) if (this.reindexing.get(agentId) === reindexing) this.reindexing.delete(agentId) if (this.backfilling.get(agentId) === backfilling) this.backfilling.delete(agentId) @@ -2292,6 +2399,16 @@ export class MemoryPresenter implements MemoryRuntimePort { if (this.vectorStoreLocks.get(agentId) === vectorStoreLock) this.vectorStoreLocks.delete(agentId) if (this.personaLocks.get(agentId) === personaLock) this.personaLocks.delete(agentId) + for (const [key, promise] of vectorWarmups) { + if (this.vectorStoreWarmups.get(key) === promise) this.vectorStoreWarmups.delete(key) + } + for (const [key, promise] of embeddingWarmups) { + if (this.embeddingWarmups.get(key) === promise) this.embeddingWarmups.delete(key) + } + for (const key of this.vectorStoreDimensionFailures.keys()) { + if (key.startsWith(`${agentId}::`)) this.vectorStoreDimensionFailures.delete(key) + } + this.vectorStoreReady.delete(agentId) } getStatus(agentId: string): MemoryStatus { @@ -2317,15 +2434,17 @@ export class MemoryPresenter implements MemoryRuntimePort { this.consolidationTimerDueAt.clear() this.lastConsolidationAt.clear() // Drain every background writer started before teardown so none touches the database after it - // closes: consolidation passes, plus the reindex/backfill/embedding drains a pass's retrieve() - // may have fired. `disposed` blocks new ones, so the in-flight set shrinks to empty. Bounded in - // case a drain keeps spawning follow-up work. + // closes: consolidation passes, plus the reindex/backfill/embedding drains and prewarm calls a + // pass's retrieve() may have fired. `disposed` blocks new ones, so the in-flight set shrinks to + // empty. Bounded in case a drain keeps spawning follow-up work. for (let i = 0; i < REINDEX_MAX_BATCHES; i += 1) { const inflight = [ ...this.consolidationRuns, ...this.reindexing.values(), ...this.backfilling.values(), - ...this.embeddingDrains.values() + ...this.embeddingDrains.values(), + ...this.vectorStoreWarmups.values(), + ...this.embeddingWarmups.values() ] if (!inflight.length) break await Promise.allSettled(inflight) @@ -2342,6 +2461,10 @@ export class MemoryPresenter implements MemoryRuntimePort { } this.vectorStores.clear() this.vectorStoreIdentities.clear() + this.vectorStoreReady.clear() + this.vectorStoreWarmups.clear() + this.vectorStoreDimensionFailures.clear() + this.embeddingWarmups.clear() this.vectorStoreLocks.clear() } @@ -2365,13 +2488,19 @@ export class MemoryPresenter implements MemoryRuntimePort { // ignored: they are never meant to be vectors, so an anomalous embedded persona (buggy/manual // data) must not be read as "stale" and drive a reindex on every recall. private hasStaleEmbeddings(agentId: string, currentDim: number, fingerprint: string): boolean { - return this.deps.repository - .listByAgent(agentId, { statuses: ['embedded'] }) - .some( - (row) => - row.kind !== 'persona' && - (row.embedding_dim !== currentDim || row.embedding_model !== fingerprint) - ) + return this.deps.repository.hasStaleEmbeddings(agentId, currentDim, fingerprint) + } + + private canUseCurrentMemoryEmbedding( + agentId: string, + embedding: { providerId: string; modelId: string } + ): boolean { + const current = this.deps.resolveAgentConfig(agentId)?.memoryEmbedding + return ( + current?.providerId === embedding.providerId && + current?.modelId === embedding.modelId && + this.canReadAgentMemory(agentId) + ) } private vectorStoreCacheKey( @@ -2382,6 +2511,165 @@ export class MemoryPresenter implements MemoryRuntimePort { return `${agentId}::${embedding.providerId}::${embedding.modelId}::${dimensions}` } + private vectorStoreWarmupKey( + agentId: string, + embedding: { providerId: string; modelId: string } + ): string { + return `${agentId}::${embedding.providerId}::${embedding.modelId}` + } + + private isVectorStoreWarm( + agentId: string, + embedding: { providerId: string; modelId: string } + ): boolean { + const readyIdentity = this.vectorStoreReady.get(agentId) + if (!readyIdentity) return false + if (this.vectorStoreIdentities.get(agentId) !== readyIdentity) return false + if (!this.vectorStores.has(agentId)) return false + // The warmup key is the 3-part identity prefix; the ready/cache key appends the dimension. + return readyIdentity.startsWith(`${this.vectorStoreWarmupKey(agentId, embedding)}::`) + } + + private markVectorStoreReady( + agentId: string, + embedding: { providerId: string; modelId: string }, + dimensions: number + ): void { + this.vectorStoreReady.set(agentId, this.vectorStoreCacheKey(agentId, embedding, dimensions)) + } + + private clearVectorStoreReady(agentId: string): void { + this.vectorStoreReady.delete(agentId) + } + + private resolveStoredCurrentEmbeddingDimension( + agentId: string, + fingerprint: string + ): number | null { + return this.deps.repository.getCurrentEmbeddingDimension(agentId, fingerprint) + } + + private async resolveWarmVectorDimensions( + agentId: string, + embedding: { providerId: string; modelId: string } + ): Promise { + const fingerprint = embeddingFingerprint(embedding.providerId, embedding.modelId) + const storedDim = this.resolveStoredCurrentEmbeddingDimension(agentId, fingerprint) + const key = this.vectorStoreWarmupKey(agentId, embedding) + if (storedDim !== null) { + this.vectorStoreDimensionFailures.delete(key) + return storedDim + } + const lastFailureAt = this.vectorStoreDimensionFailures.get(key) + if ( + lastFailureAt !== undefined && + Date.now() - lastFailureAt < WARM_DIMENSION_FAILURE_COOLDOWN_MS + ) { + throw new Error( + `[Memory] embedding dimension warm is cooling down for ${embedding.providerId}/${embedding.modelId}` + ) + } + + try { + const attrs = await this.deps.getDimensions(embedding.providerId, embedding.modelId) + const dimensions = attrs.data.dimensions + if (!Number.isFinite(dimensions) || dimensions <= 0) { + throw new Error( + attrs.errorMsg ?? + `[Memory] invalid embedding dimension for ${embedding.providerId}/${embedding.modelId}` + ) + } + this.vectorStoreDimensionFailures.delete(key) + return dimensions + } catch (error) { + this.vectorStoreDimensionFailures.set(key, Date.now()) + throw error + } + } + + private warmVectorStore( + agentId: string, + embedding: { providerId: string; modelId: string } + ): Promise { + if (this.disposed || !this.canUseCurrentMemoryEmbedding(agentId, embedding)) + return Promise.resolve() + const key = this.vectorStoreWarmupKey(agentId, embedding) + const inflight = this.vectorStoreWarmups.get(key) + if (inflight) return inflight + + const tracked = Promise.resolve() + .then(() => this.runWarmVectorStore(agentId, embedding)) + .catch((error) => { + this.clearVectorStoreReady(agentId) + logger.warn(`[Memory] vector store warm failed for ${agentId}: ${String(error)}`) + }) + .finally(() => { + if (this.vectorStoreWarmups.get(key) === tracked) this.vectorStoreWarmups.delete(key) + }) + this.vectorStoreWarmups.set(key, tracked) + return tracked + } + + private async runWarmVectorStore( + agentId: string, + embedding: { providerId: string; modelId: string } + ): Promise { + if (!this.canUseCurrentMemoryEmbedding(agentId, embedding)) return + const dimensions = await this.resolveWarmVectorDimensions(agentId, embedding) + if (!this.canUseCurrentMemoryEmbedding(agentId, embedding)) return + + const store = await this.getVectorStore(agentId, embedding, dimensions) + if (!this.canUseCurrentMemoryEmbedding(agentId, embedding)) return + + if (!store.isUsable()) { + this.clearVectorStoreReady(agentId) + if (!this.reindexing.has(agentId)) { + void this.reindexEmbeddings(agentId, true).catch((error) => { + logger.warn(`[Memory] store rebuild failed for ${agentId}: ${String(error)}`) + }) + } + return + } + + const fingerprint = embeddingFingerprint(embedding.providerId, embedding.modelId) + if (this.hasStaleEmbeddings(agentId, dimensions, fingerprint)) { + this.clearVectorStoreReady(agentId) + void this.reindexEmbeddings(agentId).catch((error) => { + logger.warn(`[Memory] reindex failed for ${agentId}: ${String(error)}`) + }) + return + } + + this.markVectorStoreReady(agentId, embedding, dimensions) + if (!this.reindexing.has(agentId)) { + void this.backfillEmbeddings(agentId).catch((error) => { + logger.warn(`[Memory] backfill failed for ${agentId}: ${String(error)}`) + }) + } + } + + private warmEmbeddingConnection( + agentId: string, + embedding: { providerId: string; modelId: string } + ): void { + if (this.disposed || !this.canUseCurrentMemoryEmbedding(agentId, embedding)) return + const key = this.vectorStoreWarmupKey(agentId, embedding) + if (this.embeddingWarmups.has(key)) return + const tracked = Promise.resolve() + .then(async () => { + await this.deps.getEmbeddings(embedding.providerId, embedding.modelId, [ + EMBEDDING_PREWARM_TEXT + ]) + }) + .catch((error) => { + logger.warn(`[Memory] embedding warm failed for ${agentId}: ${String(error)}`) + }) + .finally(() => { + if (this.embeddingWarmups.get(key) === tracked) this.embeddingWarmups.delete(key) + }) + this.embeddingWarmups.set(key, tracked) + } + /** Serialize open/close/reset of an agent's single sidecar file so it is never opened twice. */ private runExclusiveForAgent(agentId: string, task: () => Promise): Promise { const prev = this.vectorStoreLocks.get(agentId) ?? Promise.resolve() @@ -2403,8 +2691,12 @@ export class MemoryPresenter implements MemoryRuntimePort { /** Close and evict the agent's cached store (caller must hold the per-agent lock). */ private async closeVectorStore(agentId: string): Promise { + this.clearVectorStoreReady(agentId) const pending = this.vectorStores.get(agentId) - if (!pending) return + if (!pending) { + this.vectorStoreIdentities.delete(agentId) + return + } this.vectorStores.delete(agentId) this.vectorStoreIdentities.delete(agentId) const store = await pending.catch(() => null) @@ -2436,6 +2728,7 @@ export class MemoryPresenter implements MemoryRuntimePort { const pending = this.deps.createVectorStore(agentId, embedding, dimensions).catch((error) => { this.vectorStores.delete(agentId) this.vectorStoreIdentities.delete(agentId) + this.clearVectorStoreReady(agentId) throw error }) this.vectorStores.set(agentId, pending) diff --git a/src/main/presenter/memoryPresenter/memoryVectorStore.ts b/src/main/presenter/memoryPresenter/memoryVectorStore.ts index a69948cb9..8148058a3 100644 --- a/src/main/presenter/memoryPresenter/memoryVectorStore.ts +++ b/src/main/presenter/memoryPresenter/memoryVectorStore.ts @@ -1,6 +1,9 @@ import logger from '@shared/logger' +import { createHash, randomUUID } from 'node:crypto' import fs from 'node:fs' import path from 'node:path' +import { promisify } from 'node:util' +import { gunzip } from 'node:zlib' import { DuckDBConnection, DuckDBInstance, arrayValue } from '@duckdb/node-api' import { app } from 'electron' @@ -17,6 +20,18 @@ const runtimeBasePath = path .replace('app.asar', 'app.asar.unpacked') const extensionDir = path.join(runtimeBasePath, 'duckdb', 'extensions') const extensionSuffix = '.duckdb_extension' +const VSS_EXTENSION_NAME = `vss${extensionSuffix}` +const PACKAGED_VSS_ASSET_SUFFIX = '.b64' +const GUNZIP_ASYNC = promisify(gunzip) +const PACKAGED_VSS_MATERIALIZATION_PROMISES = new Map>() + +function escapeSqlPath(filePath: string): string { + return filePath.replace(/\\/g, '\\\\').replace(/'/g, "''") +} + +function materializationCacheKey(assetPath: string, materializationRoot: string): string { + return `${path.resolve(assetPath)}\0${path.resolve(materializationRoot)}` +} interface EmbeddingIdentity { providerId: string @@ -47,10 +62,15 @@ export class MemoryVectorStore implements IMemoryVectorStore { fs.mkdirSync(parentDir, { recursive: true }) } const store = new MemoryVectorStore(dbPath, metric) - if (fs.existsSync(dbPath)) { - await store.open(dimensions, embedding) - } else { - await store.initialize(dimensions, embedding) + try { + if (fs.existsSync(dbPath)) { + await store.open(dimensions, embedding) + } else { + await store.initialize(dimensions, embedding) + } + } catch (error) { + await store.close().catch(() => undefined) + throw error } return store } @@ -64,15 +84,124 @@ export class MemoryVectorStore implements IMemoryVectorStore { this.connection = await this.dbInstance.connect() } + private async loadVssFromPath(extensionPath: string, source: string): Promise { + await this.connection.run(`LOAD '${escapeSqlPath(extensionPath)}';`) + logger.info(`[MemoryVectorStore] loaded ${source} VSS extension: ${extensionPath}`) + await this.connection.run('SET hnsw_enable_experimental_persistence = true;') + } + + private async inflatePackagedVssExtension( + assetPath: string, + materializationRoot: string + ): Promise { + const asset = await fs.promises.readFile(assetPath) + const digest = createHash('sha256').update(asset).digest('hex').slice(0, 16) + const targetDir = path.join(materializationRoot, 'duckdb', 'extensions', digest) + const targetPath = path.join(targetDir, VSS_EXTENSION_NAME) + + if (fs.existsSync(targetPath)) { + return targetPath + } + + await fs.promises.mkdir(targetDir, { recursive: true }) + const tempPath = path.join( + targetDir, + `.${VSS_EXTENSION_NAME}.${process.pid}.${randomUUID()}.tmp` + ) + try { + const compressed = Buffer.from(asset.toString('utf8'), 'base64') + await fs.promises.writeFile(tempPath, await GUNZIP_ASYNC(compressed)) + if (fs.existsSync(targetPath)) { + await fs.promises.rm(tempPath, { force: true }) + return targetPath + } + await fs.promises.rename(tempPath, targetPath) + } catch (error) { + if (fs.existsSync(targetPath)) { + try { + await fs.promises.rm(tempPath, { force: true }) + } catch { + // best effort cleanup only + } + return targetPath + } + try { + await fs.promises.rm(tempPath, { force: true }) + } catch { + // best effort cleanup only + } + throw error + } + return targetPath + } + + private async materializePackagedVssExtension(assetPath: string): Promise { + const resolvedAssetPath = path.resolve(assetPath) + const materializationRoot = path.resolve(app.getPath('userData') || path.dirname(this.dbPath)) + const cacheKey = materializationCacheKey(resolvedAssetPath, materializationRoot) + const existing = PACKAGED_VSS_MATERIALIZATION_PROMISES.get(cacheKey) + if (existing) { + const existingPath = await existing + if (fs.existsSync(existingPath)) { + return existingPath + } + if (PACKAGED_VSS_MATERIALIZATION_PROMISES.get(cacheKey) === existing) { + PACKAGED_VSS_MATERIALIZATION_PROMISES.delete(cacheKey) + } else { + return this.materializePackagedVssExtension(resolvedAssetPath) + } + } + + let materializationPromise: Promise + materializationPromise = this.inflatePackagedVssExtension( + resolvedAssetPath, + materializationRoot + ).catch((error) => { + if (PACKAGED_VSS_MATERIALIZATION_PROMISES.get(cacheKey) === materializationPromise) { + PACKAGED_VSS_MATERIALIZATION_PROMISES.delete(cacheKey) + } + throw error + }) + PACKAGED_VSS_MATERIALIZATION_PROMISES.set(cacheKey, materializationPromise) + return materializationPromise + } + private async loadVss(): Promise { - const extensionPath = path.join(extensionDir, `vss${extensionSuffix}`) + const extensionPath = path.join(extensionDir, VSS_EXTENSION_NAME) + const packagedAssetPath = `${extensionPath}${PACKAGED_VSS_ASSET_SUFFIX}` if (fs.existsSync(extensionPath)) { - const escapedPath = extensionPath.replace(/\\/g, '\\\\') - await this.connection.run(`LOAD '${escapedPath}';`) + try { + await this.loadVssFromPath(extensionPath, 'bundled') + return + } catch (error) { + const message = `[MemoryVectorStore] bundled VSS extension failed to load from ${extensionPath}: ${String(error)}` + if (app.isPackaged) { + logger.error(`${message}. Vector recall disabled until a valid bundled extension ships.`) + throw error + } + logger.warn(`${message}; falling back to network INSTALL vss in development.`) + } + } else if (app.isPackaged && fs.existsSync(packagedAssetPath)) { + try { + const materializedPath = await this.materializePackagedVssExtension(packagedAssetPath) + await this.loadVssFromPath(materializedPath, 'materialized packaged') + return + } catch (error) { + logger.error( + `[MemoryVectorStore] packaged VSS extension failed to materialize/load from ${packagedAssetPath}: ${String(error)}. Vector recall disabled until a valid bundled extension ships.` + ) + throw error + } } else { - await this.connection.run('INSTALL vss;') - await this.connection.run('LOAD vss;') + const message = `[MemoryVectorStore] bundled VSS extension missing at ${extensionPath} or ${packagedAssetPath}. Run installRuntime:duckdb:vss before packaging.` + if (app.isPackaged) { + logger.error(`${message} Vector recall disabled until a valid bundled extension ships.`) + throw new Error(message) + } + logger.warn(`${message} Falling back to network INSTALL vss in development.`) } + await this.connection.run('INSTALL vss;') + await this.connection.run('LOAD vss;') await this.connection.run('SET hnsw_enable_experimental_persistence = true;') } diff --git a/src/main/presenter/memoryPresenter/types.ts b/src/main/presenter/memoryPresenter/types.ts index d7d89715e..e724e73e9 100644 --- a/src/main/presenter/memoryPresenter/types.ts +++ b/src/main/presenter/memoryPresenter/types.ts @@ -18,6 +18,7 @@ import type { DeepChatAgentMemoryRetrieval } from '@shared/types/agent-interface' import type { AgentMemoryCategory } from '@shared/types/agent-memory' +import type { LLM_EMBEDDING_ATTRS } from '@shared/presenter' export type { AgentMemoryKind, @@ -82,6 +83,8 @@ export interface MemoryRepositoryPort { setConflictWith(id: string, targetId: string | null): void setLastConsolidatedAt(id: string, at?: number): void getLastConsolidatedAt(agentId: string): number | null + getCurrentEmbeddingDimension(agentId: string, fingerprint: string): number | null + hasStaleEmbeddings(agentId: string, currentDim: number, fingerprint: string): boolean archive(id: string, at?: number): void listArchiveCandidates(agentId: string, before: number, decayBelow: number): AgentMemoryRow[] delete(id: string): void @@ -257,6 +260,10 @@ export interface MemoryPresenterDeps { // reads/writes against arbitrary or nonexistent agents; skipped when absent (e.g. tests). isManagedAgent?: (agentId: string) => boolean getEmbeddings: (providerId: string, modelId: string, texts: string[]) => Promise + getDimensions: ( + providerId: string, + modelId: string + ) => Promise<{ data: LLM_EMBEDDING_ATTRS; errorMsg?: string }> generateText: (providerId: string, modelId: string, prompt: string) => Promise // Creates/opens the agent's vector store: embedding identity validates it, dimensions seed // the first initialization. diff --git a/src/main/presenter/remoteControlPresenter/adapters/feishu/FeishuAdapter.ts b/src/main/presenter/remoteControlPresenter/adapters/feishu/FeishuAdapter.ts index 36a5c8281..b9edb372b 100644 --- a/src/main/presenter/remoteControlPresenter/adapters/feishu/FeishuAdapter.ts +++ b/src/main/presenter/remoteControlPresenter/adapters/feishu/FeishuAdapter.ts @@ -35,6 +35,7 @@ export class FeishuAdapter extends ChannelAdapter { verificationToken: string encryptKey: string } + private readonly enableStreamingCards: boolean private client: FeishuClient | null = null private runtime: FeishuRuntime | null = null private feishuStatus: FeishuRuntimeStatusSnapshot = { ...DEFAULT_STATUS } @@ -53,6 +54,7 @@ export class FeishuAdapter extends ChannelAdapter { verificationToken: String(config.channelConfig.verificationToken ?? '').trim(), encryptKey: String(config.channelConfig.encryptKey ?? '').trim() } + this.enableStreamingCards = config.channelConfig.enableStreamingCards === true } protected async performConnect(_signal: AbortSignal): Promise { @@ -71,6 +73,7 @@ export class FeishuAdapter extends ChannelAdapter { getRuntimeStatus: () => ({ ...this.feishuStatus }) }), bindingStore: this.bindingStore, + enableStreamingCards: this.enableStreamingCards, logger, onStatusChange: (snapshot) => { this.handleStatusChange(snapshot) diff --git a/src/main/presenter/remoteControlPresenter/feishu/feishuAuth.ts b/src/main/presenter/remoteControlPresenter/feishu/feishuAuth.ts new file mode 100644 index 000000000..3cd6b566c --- /dev/null +++ b/src/main/presenter/remoteControlPresenter/feishu/feishuAuth.ts @@ -0,0 +1,335 @@ +import type { FeishuBrand } from '@shared/presenter' + +export const FEISHU_AUTH_CALLBACK_PATH = '/remote/feishu/auth/callback' +export const FEISHU_AUTH_DEFAULT_PORT = 32178 +export const FEISHU_AUTH_SCOPE = '' + +export class FeishuOAuthError extends Error { + readonly exposeToUser = false + + constructor(message: string) { + super(message) + this.name = 'FeishuOAuthError' + } +} + +export interface FeishuOAuthCredentials { + brand: FeishuBrand + appId: string + appSecret: string + redirectUri: string +} + +export interface FeishuOAuthUserInfo { + openId: string + unionId?: string + name?: string +} + +export interface FeishuPersonalAgentRegistrationStart { + installUrl: string + deviceCode: string + userCode: string + intervalSec: number + expireInSec: number +} + +export interface FeishuPersonalAgentRegistrationPoll { + ok: boolean + status: number + data: Record +} + +type FeishuOAuthTokenResponse = { + code?: number + msg?: string + error?: string + error_description?: string + data?: { + access_token?: string + token_type?: string + scope?: string + } + access_token?: string + token_type?: string + scope?: string +} + +type FeishuUserInfoResponse = { + code?: number + msg?: string + error?: string + error_description?: string + data?: Record + open_id?: string + union_id?: string + name?: string +} + +const FEISHU_ACCOUNTS_BASE_URL = 'https://accounts.feishu.cn' +const FEISHU_OPEN_BASE_URL = 'https://open.feishu.cn' +const LARK_ACCOUNTS_BASE_URL = 'https://accounts.larksuite.com' +const LARK_OPEN_BASE_URL = 'https://open.larksuite.com' +const FEISHU_PERSONAL_AGENT_REGISTRATION_PATH = '/oauth/v1/app/registration' +const FEISHU_REQUEST_TIMEOUT_MS = 10_000 + +const createFeishuFetchSignal = (signal?: AbortSignal): AbortSignal => { + const timeoutSignal = AbortSignal.timeout(FEISHU_REQUEST_TIMEOUT_MS) + return signal ? AbortSignal.any([timeoutSignal, signal]) : timeoutSignal +} + +export const createDefaultFeishuAuthRedirectUri = (): string => + `http://127.0.0.1:${FEISHU_AUTH_DEFAULT_PORT}${FEISHU_AUTH_CALLBACK_PATH}` + +export const resolveFeishuAuthDomains = ( + brand: FeishuBrand +): { + accountsBaseUrl: string + openBaseUrl: string + developerConsoleUrl: string + appCreationUrl: string + echoBotTutorialUrl: string +} => { + if (brand === 'lark') { + return { + accountsBaseUrl: LARK_ACCOUNTS_BASE_URL, + openBaseUrl: LARK_OPEN_BASE_URL, + developerConsoleUrl: `${LARK_OPEN_BASE_URL}/app`, + appCreationUrl: `${LARK_OPEN_BASE_URL}/app`, + echoBotTutorialUrl: `${LARK_OPEN_BASE_URL}/document/develop-an-echo-bot/introduction` + } + } + + return { + accountsBaseUrl: FEISHU_ACCOUNTS_BASE_URL, + openBaseUrl: FEISHU_OPEN_BASE_URL, + developerConsoleUrl: `${FEISHU_OPEN_BASE_URL}/app`, + appCreationUrl: `${FEISHU_OPEN_BASE_URL}/app`, + echoBotTutorialUrl: `${FEISHU_OPEN_BASE_URL}/document/develop-an-echo-bot/introduction` + } +} + +export const buildFeishuAuthUrl = (credentials: FeishuOAuthCredentials, state: string): string => { + const domains = resolveFeishuAuthDomains(credentials.brand) + const params = new URLSearchParams({ + client_id: credentials.appId, + redirect_uri: credentials.redirectUri, + response_type: 'code', + state + }) + if (FEISHU_AUTH_SCOPE) { + params.set('scope', FEISHU_AUTH_SCOPE) + } + + return `${domains.accountsBaseUrl}/open-apis/authen/v1/authorize?${params.toString()}` +} + +const asRecord = (value: unknown): Record => { + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + return value as Record + } + + return {} +} + +const recordString = (record: Record, key: string): string => { + const value = record[key] + return typeof value === 'string' ? value.trim() : '' +} + +const normalizeIntervalSeconds = (value: unknown, fallback: number): number => { + const parsed = Number(value) + return Number.isFinite(parsed) ? Math.max(1, Math.floor(parsed)) : fallback +} + +const readJsonRecord = async (response: Response): Promise> => { + const text = await response.text() + try { + return asRecord(JSON.parse(text) as unknown) + } catch { + return { message: text.trim() || response.statusText } + } +} + +const feishuRegistrationAccountsBaseUrl = (brand: FeishuBrand): string => + brand === 'lark' ? LARK_ACCOUNTS_BASE_URL : FEISHU_ACCOUNTS_BASE_URL + +const postFeishuRegistrationForm = async ( + baseUrl: string, + body: Record, + signal?: AbortSignal +): Promise<{ ok: boolean; status: number; data: Record }> => { + const response = await fetch(`${baseUrl}${FEISHU_PERSONAL_AGENT_REGISTRATION_PATH}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded' + }, + body: new URLSearchParams(body).toString(), + signal: createFeishuFetchSignal(signal) + }) + + return { + ok: response.ok, + status: response.status, + data: await readJsonRecord(response) + } +} + +const parseJsonResponse = async (response: Response, fallbackMessage: string): Promise => { + let payload: unknown + try { + payload = await response.json() + } catch { + throw new FeishuOAuthError(fallbackMessage) + } + + if (!response.ok) { + const record = payload as { msg?: string; error_description?: string; error?: string } + throw new FeishuOAuthError( + record?.msg?.trim() || + record?.error_description?.trim() || + record?.error?.trim() || + `${fallbackMessage} (${response.status})` + ) + } + + return payload as T +} + +const assertFeishuApiSuccess = ( + payload: { code?: number; msg?: string; error?: string; error_description?: string }, + fallbackMessage: string +): void => { + if (typeof payload.code === 'number' && payload.code !== 0) { + throw new FeishuOAuthError(payload.msg?.trim() || fallbackMessage) + } + + if (payload.error) { + throw new FeishuOAuthError( + payload.error_description?.trim() || payload.error.trim() || fallbackMessage + ) + } +} + +export const exchangeFeishuOAuthCode = async ( + credentials: FeishuOAuthCredentials, + code: string, + signal?: AbortSignal +): Promise => { + const domains = resolveFeishuAuthDomains(credentials.brand) + const payload = await parseJsonResponse( + await fetch(`${domains.openBaseUrl}/open-apis/authen/v2/oauth/token`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json; charset=utf-8' + }, + body: JSON.stringify({ + grant_type: 'authorization_code', + client_id: credentials.appId, + client_secret: credentials.appSecret, + code, + redirect_uri: credentials.redirectUri + }), + signal: createFeishuFetchSignal(signal) + }), + 'Failed to exchange Feishu authorization code.' + ) + + assertFeishuApiSuccess(payload, 'Failed to exchange Feishu authorization code.') + + const accessToken = (payload.data?.access_token ?? payload.access_token ?? '').trim() + if (!accessToken) { + throw new FeishuOAuthError('Feishu authorization response did not include a user access token.') + } + + return accessToken +} + +export const fetchFeishuOAuthUserInfo = async ( + brand: FeishuBrand, + accessToken: string, + signal?: AbortSignal +): Promise => { + const domains = resolveFeishuAuthDomains(brand) + const payload = await parseJsonResponse( + await fetch(`${domains.openBaseUrl}/open-apis/authen/v1/user_info`, { + method: 'GET', + headers: { + Authorization: `Bearer ${accessToken}` + }, + signal: createFeishuFetchSignal(signal) + }), + 'Failed to fetch Feishu user info.' + ) + + assertFeishuApiSuccess(payload, 'Failed to fetch Feishu user info.') + + const data = payload.data ?? payload + const openId = String(data.open_id ?? '').trim() + if (!openId) { + throw new FeishuOAuthError('Feishu user info response did not include open_id.') + } + + const unionId = String(data.union_id ?? '').trim() + const name = String(data.name ?? '').trim() + + return { + openId, + ...(unionId ? { unionId } : {}), + ...(name ? { name } : {}) + } +} + +export const startFeishuPersonalAgentRegistration = async ( + signal?: AbortSignal +): Promise => { + const response = await postFeishuRegistrationForm( + FEISHU_ACCOUNTS_BASE_URL, + { + action: 'begin', + archetype: 'PersonalAgent', + auth_method: 'client_secret', + request_user_info: 'open_id tenant_brand' + }, + signal + ) + + if (!response.ok) { + throw new FeishuOAuthError('Failed to start Feishu PersonalAgent registration.') + } + + const installUrl = recordString(response.data, 'verification_uri_complete') + const deviceCode = recordString(response.data, 'device_code') + const userCode = recordString(response.data, 'user_code') + if (!installUrl || !deviceCode) { + throw new FeishuOAuthError('Feishu PersonalAgent registration response was incomplete.') + } + + return { + installUrl, + deviceCode, + userCode, + intervalSec: normalizeIntervalSeconds(response.data.interval, 5), + expireInSec: normalizeIntervalSeconds( + response.data.expire_in ?? response.data.expires_in, + 5 * 60 + ) + } +} + +export const pollFeishuPersonalAgentRegistration = async ( + brand: FeishuBrand, + deviceCode: string, + signal?: AbortSignal +): Promise => + postFeishuRegistrationForm( + feishuRegistrationAccountsBaseUrl(brand), + { + action: 'poll', + device_code: deviceCode + }, + signal + ) + +export const readFeishuRegistrationString = recordString +export const asFeishuRegistrationRecord = asRecord diff --git a/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts b/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts index dbb28c584..b1c42032c 100644 --- a/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts +++ b/src/main/presenter/remoteControlPresenter/feishu/feishuClient.ts @@ -24,6 +24,14 @@ type FeishuMessageResponse = { } } +type FeishuApiResponse = { + code?: number + msg?: string + data?: Record +} + +export const FEISHU_STREAMING_CARD_ELEMENT_ID = 'md_stream' + const createTextPayload = (text: string): string => JSON.stringify({ text @@ -38,6 +46,49 @@ const createMarkdownPayload = (text: string): string => const createCardPayload = (card: FeishuInteractiveCardPayload): string => JSON.stringify(card) +const createStreamingCardEntityMessagePayload = (cardId: string): string => + JSON.stringify({ + type: 'card', + data: { + card_id: cardId + } + }) + +const createStreamingCardJson = (initialContent: string): Record => ({ + schema: '2.0', + config: { + streaming_mode: true, + update_multi: true, + summary: { + content: '' + }, + streaming_config: { + print_frequency_ms: { + default: 70, + android: 70, + ios: 70, + pc: 70 + }, + print_step: { + default: 1, + android: 1, + ios: 1, + pc: 1 + }, + print_strategy: 'fast' + } + }, + body: { + elements: [ + { + tag: 'markdown', + content: initialContent, + element_id: FEISHU_STREAMING_CARD_ELEMENT_ID + } + ] + } +}) + const readHeaderValue = (headers: unknown, name: string): string | undefined => { if (!headers) { return undefined @@ -90,6 +141,12 @@ const resolveLarkDomain = (brand: FeishuBrand): string | undefined => { return ((Lark as any).Domain?.Feishu as string | undefined) ?? 'https://open.feishu.cn' } +const assertFeishuApiSuccess = (response: FeishuApiResponse | null | undefined, action: string) => { + if (!response || response.code !== 0) { + throw new Error(response?.msg?.trim() || `Feishu CardKit ${action} failed.`) + } +} + export const chunkFeishuText = ( text: string, limit: number = FEISHU_OUTBOUND_TEXT_LIMIT @@ -276,6 +333,106 @@ export class FeishuClient { return messageId } + async createStreamingCard(initialContent: string = ''): Promise<{ + cardId: string + elementId: string + }> { + const response = (await (this.sdk as any).request({ + method: 'POST', + url: '/open-apis/cardkit/v1/cards', + data: { + type: 'card_json', + data: JSON.stringify(createStreamingCardJson(initialContent)) + } + })) as FeishuApiResponse & { + card_id?: string + } + + assertFeishuApiSuccess(response, 'create streaming card') + const cardId = String(response?.data?.card_id ?? response?.card_id ?? '').trim() + if (!cardId) { + throw new Error('Feishu CardKit create streaming card did not return card_id.') + } + + return { + cardId, + elementId: FEISHU_STREAMING_CARD_ELEMENT_ID + } + } + + async sendCardEntity(target: FeishuTransportTarget, cardId: string): Promise { + const content = createStreamingCardEntityMessagePayload(cardId) + + if (target.replyToMessageId) { + const response = (await this.sdk.im.message.reply({ + path: { + message_id: target.replyToMessageId + }, + data: { + content, + msg_type: 'interactive', + reply_in_thread: Boolean(target.threadId) + } + })) as FeishuMessageResponse + const messageId = response.data?.message_id?.trim() + if (!messageId) { + throw new Error('Feishu CardKit send card entity did not return message_id.') + } + return messageId + } + + const response = (await this.sdk.im.message.create({ + params: { + receive_id_type: 'chat_id' + }, + data: { + receive_id: target.chatId, + msg_type: 'interactive', + content + } + })) as FeishuMessageResponse + const messageId = response.data?.message_id?.trim() + if (!messageId) { + throw new Error('Feishu CardKit send card entity did not return message_id.') + } + return messageId + } + + async updateStreamingCardContent(params: { + cardId: string + elementId: string + content: string + sequence: number + }): Promise { + const response = (await (this.sdk as any).request({ + method: 'PUT', + url: `/open-apis/cardkit/v1/cards/${encodeURIComponent(params.cardId)}/elements/${encodeURIComponent(params.elementId)}/content`, + data: { + content: params.content, + sequence: params.sequence + } + })) as FeishuApiResponse + + assertFeishuApiSuccess(response, 'update streaming card content') + } + + async closeStreamingCard(cardId: string, sequence: number): Promise { + const response = (await (this.sdk as any).request({ + method: 'PATCH', + url: `/open-apis/cardkit/v1/cards/${encodeURIComponent(cardId)}/settings`, + data: { + settings: JSON.stringify({ + config: { + streaming_mode: false + } + }), + sequence + } + })) as FeishuApiResponse + + assertFeishuApiSuccess(response, 'close streaming card') + } + async downloadMessageResource(params: { messageId: string fileKey: string diff --git a/src/main/presenter/remoteControlPresenter/feishu/feishuRuntime.ts b/src/main/presenter/remoteControlPresenter/feishu/feishuRuntime.ts index 08d66c789..b7037d7cb 100644 --- a/src/main/presenter/remoteControlPresenter/feishu/feishuRuntime.ts +++ b/src/main/presenter/remoteControlPresenter/feishu/feishuRuntime.ts @@ -27,13 +27,19 @@ const sleep = async (ms: number): Promise => { await new Promise((resolve) => setTimeout(resolve, ms)) } +const safeErrorMessage = (error: unknown): string => + error instanceof Error ? error.message : String(error) + const FEISHU_INTERNAL_ERROR_REPLY = 'An internal error occurred while processing your request.' +const FEISHU_STREAMING_CARD_FALLBACK_NOTICE = + 'Feishu CardKit streaming failed. Falling back to normal message updates. Check that the app has im:message and cardkit:card:write permissions.' type FeishuRuntimeDeps = { client: FeishuClient parser: FeishuParser router: FeishuCommandRouter bindingStore: RemoteBindingStore + enableStreamingCards?: boolean logger?: { error: (...params: unknown[]) => void } @@ -56,6 +62,14 @@ type FeishuRemoteDeliveryState = { }> } +type FeishuStreamingCardDeliveryState = { + cardId: string + elementId: string + sequence: number + lastText: string + closed: boolean +} + export class FeishuRuntime { private runId = 0 private started = false @@ -403,6 +417,19 @@ export class FeishuRuntime { const startedAt = Date.now() const endpointKey = buildFeishuEndpointKey(target.chatId, target.threadId) + if (this.deps.enableStreamingCards) { + const streamed = await this.deliverConversationWithStreamingCard( + target, + execution, + runId, + endpointKey, + startedAt + ) + if (streamed) { + return + } + } + while (this.isCurrentRun(runId)) { const snapshot = await execution.getSnapshot() if (!this.isCurrentRun(runId)) { @@ -503,6 +530,244 @@ export class FeishuRuntime { } } + private async deliverConversationWithStreamingCard( + target: FeishuTransportTarget, + execution: RemoteConversationExecution, + runId: number, + endpointKey: string, + startedAt: number + ): Promise { + let cardState: FeishuStreamingCardDeliveryState | null = null + const closeAndFinish = async (): Promise => { + try { + cardState = await this.closeStreamingCardIfNeeded(cardState) + } catch (error) { + console.warn('[FeishuRuntime] Failed to close streaming card before exit:', { + cardId: cardState?.cardId, + error: safeErrorMessage(error) + }) + } + return true + } + + try { + while (this.isCurrentRun(runId)) { + const snapshot = await execution.getSnapshot() + if (!this.isCurrentRun(runId)) { + return await closeAndFinish() + } + + const sourceMessageId = snapshot.messageId ?? execution.eventId ?? null + let deliverySegments = this.getSnapshotDeliverySegments(snapshot, sourceMessageId) + + if (snapshot.completed) { + if (snapshot.pendingInteraction) { + const pendingText = this.buildStreamingCardText(deliverySegments) + if (pendingText) { + cardState = await this.syncStreamingCardText(target, cardState, pendingText) + } + cardState = await this.closeStreamingCardIfNeeded(cardState) + await this.dispatchOutboundActions( + target, + [ + { + type: 'sendCard', + card: buildFeishuPendingInteractionCard(snapshot.pendingInteraction), + fallbackText: buildFeishuPendingInteractionText(snapshot.pendingInteraction) + } + ], + runId + ) + this.deps.bindingStore.clearRemoteDeliveryState(endpointKey) + return true + } + + const finalText = this.getFinalDeliveryText(snapshot) + deliverySegments = this.appendTerminalDeliverySegment( + deliverySegments, + sourceMessageId, + finalText + ) + const completedText = this.buildStreamingCardText(deliverySegments) || finalText.trim() + if (completedText) { + cardState = await this.syncStreamingCardText(target, cardState, completedText) + } + cardState = await this.closeStreamingCardIfNeeded(cardState) + this.deps.bindingStore.clearRemoteDeliveryState(endpointKey) + await this.sendGeneratedImages(target, snapshot) + return true + } + + if (Date.now() - startedAt >= FEISHU_CONVERSATION_POLL_TIMEOUT_MS) { + if (!this.isCurrentRun(runId)) { + return await closeAndFinish() + } + const timeoutText = + 'The current conversation timed out before finishing. Please try again.' + const timeoutSegments = this.appendTerminalDeliverySegment( + deliverySegments, + sourceMessageId, + timeoutText + ) + cardState = await this.syncStreamingCardText( + target, + cardState, + this.buildStreamingCardText(timeoutSegments) || timeoutText + ) + cardState = await this.closeStreamingCardIfNeeded(cardState) + this.deps.bindingStore.clearRemoteDeliveryState(endpointKey) + return true + } + + const streamingText = this.buildStreamingCardText(deliverySegments, snapshot.statusText) + if (streamingText) { + cardState = await this.syncStreamingCardText(target, cardState, streamingText) + } + + await sleep(TELEGRAM_STREAM_POLL_INTERVAL_MS) + } + + return await closeAndFinish() + } catch (error) { + console.warn('[FeishuRuntime] Streaming card delivery failed, falling back to markdown:', { + chatId: target.chatId, + threadId: target.threadId, + replyToMessageId: target.replyToMessageId, + error: safeErrorMessage(error) + }) + if (this.isCurrentRun(runId)) { + try { + await this.deps.client.sendText(target, FEISHU_STREAMING_CARD_FALLBACK_NOTICE) + } catch (noticeError) { + console.warn('[FeishuRuntime] Failed to send streaming card fallback notice:', { + chatId: target.chatId, + threadId: target.threadId, + replyToMessageId: target.replyToMessageId, + error: safeErrorMessage(noticeError) + }) + } + } + return false + } + } + + private buildStreamingCardText(segments: RemoteDeliverySegment[], statusText?: string): string { + const status = statusText?.trim() ?? '' + const processText = segments + .filter((segment) => segment.kind === 'process') + .map((segment) => segment.text.trim()) + .filter(Boolean) + .join('\n\n') + const answerText = segments + .filter((segment) => segment.kind !== 'process') + .map((segment) => segment.text.trim()) + .filter(Boolean) + .join('\n\n') + + return [ + status ? `**Status**\n${status}` : '', + processText ? `**Process**\n${processText}` : '', + answerText ? `**Answer**\n${answerText}` : '' + ] + .filter(Boolean) + .join('\n\n') + .trim() + } + + private async syncStreamingCardText( + target: FeishuTransportTarget, + state: FeishuStreamingCardDeliveryState | null, + text: string + ): Promise { + const normalized = optimizeMarkdownForFeishu(text.trim()) + if (!normalized) { + if (state) { + return state + } + throw new Error('Feishu streaming card content is empty.') + } + + const nextState = state ?? (await this.createStreamingCardState(target)) + if (nextState.lastText === normalized) { + return nextState + } + + const sequence = nextState.sequence + 1 + try { + await this.deps.client.updateStreamingCardContent({ + cardId: nextState.cardId, + elementId: nextState.elementId, + content: normalized, + sequence + }) + } catch (error) { + await this.closeStreamingCardAfterFailure(nextState, sequence + 1) + throw error + } + + return { + ...nextState, + sequence, + lastText: normalized + } + } + + private async createStreamingCardState( + target: FeishuTransportTarget + ): Promise { + const card = await this.deps.client.createStreamingCard('') + const state: FeishuStreamingCardDeliveryState = { + cardId: card.cardId, + elementId: card.elementId, + sequence: 0, + lastText: '', + closed: false + } + + try { + await this.deps.client.sendCardEntity(target, card.cardId) + } catch (error) { + await this.closeStreamingCardAfterFailure(state, state.sequence + 1) + throw error + } + + return state + } + + private async closeStreamingCardIfNeeded( + state: FeishuStreamingCardDeliveryState | null + ): Promise { + if (!state || state.closed) { + return state + } + + const sequence = state.sequence + 1 + await this.deps.client.closeStreamingCard(state.cardId, sequence) + return { + ...state, + sequence, + closed: true + } + } + + private async closeStreamingCardAfterFailure( + state: FeishuStreamingCardDeliveryState, + sequence: number + ): Promise { + if (state.closed) { + return + } + + try { + await this.deps.client.closeStreamingCard(state.cardId, sequence) + } catch (error) { + console.warn('[FeishuRuntime] Failed to close streaming card after failure:', { + cardId: state.cardId, + error: safeErrorMessage(error) + }) + } + } + private getStoredDeliveryState(endpointKey: string): FeishuRemoteDeliveryState | null { const state = this.deps.bindingStore.getRemoteDeliveryState(endpointKey) if (!state) { @@ -811,7 +1076,7 @@ export class FeishuRuntime { } catch (error) { console.warn( '[FeishuRuntime] Failed to send interactive card, falling back to text:', - error + safeErrorMessage(error) ) await this.deps.client.sendMarkdown(target, optimizeMarkdownForFeishu(action.fallbackText)) } diff --git a/src/main/presenter/remoteControlPresenter/index.ts b/src/main/presenter/remoteControlPresenter/index.ts index c4e1f2cd4..5a95ef097 100644 --- a/src/main/presenter/remoteControlPresenter/index.ts +++ b/src/main/presenter/remoteControlPresenter/index.ts @@ -1,10 +1,20 @@ import { BrowserWindow } from 'electron' +import { randomBytes } from 'node:crypto' +import * as http from 'node:http' import logger from '@shared/logger' import type { ChannelSettingsMap, DiscordPairingSnapshot, DiscordRemoteSettings, DiscordRemoteStatus, + FeishuAuthResult, + FeishuAuthSession, + FeishuAuthStartInput, + FeishuAuthWaitInput, + FeishuInstallResult, + FeishuInstallSession, + FeishuInstallStartInput, + FeishuInstallWaitInput, FeishuPairingSnapshot, FeishuRemoteSettings, FeishuRemoteStatus, @@ -57,10 +67,88 @@ import { DiscordAdapter } from './adapters/discord/DiscordAdapter' import { FeishuAdapter } from './adapters/feishu/FeishuAdapter' import { QQBotAdapter } from './adapters/qqbot/QQBotAdapter' import { WeixinIlinkAdapter } from './adapters/weixinIlink/WeixinIlinkAdapter' +import { + asFeishuRegistrationRecord, + buildFeishuAuthUrl, + createDefaultFeishuAuthRedirectUri, + exchangeFeishuOAuthCode, + fetchFeishuOAuthUserInfo, + pollFeishuPersonalAgentRegistration, + readFeishuRegistrationString, + resolveFeishuAuthDomains, + startFeishuPersonalAgentRegistration +} from './feishu/feishuAuth' import { WeixinIlinkClient } from './weixinIlink/weixinIlinkClient' const DEFAULT_CHANNEL_ID = 'default' const WEIXIN_TRACE_LOG_ENABLED = process.env.DEEPCHAT_WEIXIN_TRACE === '1' +const FEISHU_AUTH_SESSION_TTL_MS = 5 * 60 * 1000 +const FEISHU_AUTH_DEFAULT_WAIT_TIMEOUT_MS = 5 * 60 * 1000 +const FEISHU_INSTALL_DEFAULT_WAIT_TIMEOUT_MS = 5 * 60 * 1000 + +type FeishuAuthSessionState = { + sessionKey: string + state: string + brand: 'feishu' | 'lark' + appId: string + appSecret: string + redirectUri: string + authUrl: string + expiresAt: number + server: http.Server | null + window: BrowserWindow | null + cleanupTimer: NodeJS.Timeout | null + abortController: AbortController + resolve: (result: FeishuAuthResult) => void + resultPromise: Promise + completed: boolean +} + +type FeishuInstallSessionState = { + sessionKey: string + requestedBrand: 'feishu' | 'lark' + pollBrand: 'feishu' | 'lark' + deviceCode: string + installUrl: string + userCode: string + expiresAt: number + intervalMs: number + cleanupTimer: NodeJS.Timeout | null + abortController: AbortController + resolve: (result: FeishuInstallResult) => void + resultPromise: Promise + completed: boolean + polling: boolean +} + +const createFeishuAuthResult = ( + input: Omit & { + authorized: boolean + openId?: string | null + } +): FeishuAuthResult => ({ + authorized: input.authorized, + openId: input.openId ?? null, + ...(input.unionId ? { unionId: input.unionId } : {}), + ...(input.name ? { name: input.name } : {}), + ...(input.message ? { message: input.message } : {}), + ...(input.messageKey ? { messageKey: input.messageKey } : {}) +}) + +const createFeishuInstallResult = ( + input: Omit & { + installed: boolean + brand?: 'feishu' | 'lark' | null + appId?: string | null + } +): FeishuInstallResult => ({ + installed: input.installed, + brand: input.brand ?? null, + appId: input.appId ?? null, + ...(input.openId ? { openId: input.openId } : {}), + ...(input.message ? { message: input.message } : {}), + ...(input.messageKey ? { messageKey: input.messageKey } : {}) +}) const DEFAULT_TELEGRAM_POLLER_STATUS: TelegramPollerStatusSnapshot = { state: 'stopped', @@ -96,6 +184,8 @@ export class RemoteControlPresenter { private readonly bindingStore: RemoteBindingStore private readonly channelManager: ChannelManager private runtimeOperation: Promise = Promise.resolve() + private readonly feishuAuthSessions = new Map() + private readonly feishuInstallSessions = new Map() private weixinIlinkLoginWindow: BrowserWindow | null = null private weixinIlinkLoginWindowUrl: string | null = null private readonly weixinIlinkLoginWaits = new Map>() @@ -122,6 +212,12 @@ export class RemoteControlPresenter { await this.enqueueRuntimeOperation(async () => { await this.channelManager.unregisterAll() }) + for (const sessionKey of Array.from(this.feishuAuthSessions.keys())) { + await this.cancelFeishuAuth(sessionKey) + } + for (const sessionKey of Array.from(this.feishuInstallSessions.keys())) { + await this.cancelFeishuInstall(sessionKey) + } this.weixinIlinkLoginWaits.clear() this.closeWeixinIlinkLoginWindow() } @@ -146,6 +242,7 @@ export class RemoteControlPresenter { verificationToken: remoteConfig.verificationToken, encryptKey: remoteConfig.encryptKey, remoteEnabled: remoteConfig.enabled, + enableStreamingCards: remoteConfig.enableStreamingCards, defaultAgentId: remoteConfig.defaultAgentId, defaultWorkdir: remoteConfig.defaultWorkdir, pairedUserOpenIds: [...remoteConfig.pairedUserOpenIds] @@ -521,6 +618,7 @@ export class RemoteControlPresenter { currentRemoteConfig.appSecret !== normalized.appSecret || currentRemoteConfig.verificationToken !== normalized.verificationToken || currentRemoteConfig.encryptKey !== normalized.encryptKey || + currentRemoteConfig.enableStreamingCards !== normalized.enableStreamingCards || currentRemoteConfig.defaultWorkdir !== normalized.defaultWorkdir this.bindingStore.updateFeishuConfig((config) => ({ @@ -531,6 +629,7 @@ export class RemoteControlPresenter { verificationToken: normalized.verificationToken, encryptKey: normalized.encryptKey, enabled: normalized.remoteEnabled, + enableStreamingCards: normalized.enableStreamingCards, defaultAgentId, defaultWorkdir: normalized.defaultWorkdir, pairedUserOpenIds: config.pairedUserOpenIds, @@ -564,6 +663,265 @@ export class RemoteControlPresenter { } } + async startFeishuAuth(input: FeishuAuthStartInput = {}): Promise { + this.pruneExpiredFeishuAuthSessions() + for (const existingSessionKey of Array.from(this.feishuAuthSessions.keys())) { + await this.cancelFeishuAuth(existingSessionKey) + } + + const currentConfig = this.bindingStore.getFeishuConfig() + const brand = input.brand === 'lark' ? 'lark' : currentConfig.brand + const appId = input.appId?.trim() || currentConfig.appId.trim() + const appSecret = input.appSecret?.trim() || currentConfig.appSecret.trim() + const redirectUri = input.redirectUri?.trim() || createDefaultFeishuAuthRedirectUri() + + if (!appId || !appSecret) { + throw new Error('Feishu App ID and App Secret are required before scan authorization.') + } + + this.assertLoopbackFeishuAuthRedirectUri(redirectUri) + + const sessionKey = randomBytes(16).toString('hex') + const state = randomBytes(16).toString('hex') + const expiresAt = Date.now() + FEISHU_AUTH_SESSION_TTL_MS + const authUrl = buildFeishuAuthUrl( + { + brand, + appId, + appSecret, + redirectUri + }, + state + ) + let resolveResult!: (result: FeishuAuthResult) => void + const resultPromise = new Promise((resolve) => { + resolveResult = resolve + }) + const session: FeishuAuthSessionState = { + sessionKey, + state, + brand, + appId, + appSecret, + redirectUri, + authUrl, + expiresAt, + server: null, + window: null, + cleanupTimer: null, + abortController: new AbortController(), + resolve: resolveResult, + resultPromise, + completed: false + } + + this.feishuAuthSessions.set(sessionKey, session) + + try { + session.server = await this.startFeishuAuthCallbackServer(session) + session.cleanupTimer = setTimeout(() => { + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authTimeout' + }) + ) + this.feishuAuthSessions.delete(session.sessionKey) + }, FEISHU_AUTH_SESSION_TTL_MS) + this.openFeishuAuthWindow(session) + } catch (error) { + this.feishuAuthSessions.delete(sessionKey) + this.cleanupFeishuAuthSession(session) + throw error + } + + return { + sessionKey, + authUrl, + redirectUri, + expiresAt, + messageKey: 'settings.remote.feishu.authStarted' + } + } + + async waitForFeishuAuth(input: FeishuAuthWaitInput): Promise { + const sessionKey = input.sessionKey.trim() + if (!sessionKey) { + return createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authFailed' + }) + } + + const session = this.feishuAuthSessions.get(sessionKey) + if (!session) { + return createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authSessionMissing' + }) + } + + const timeoutMs = Math.min( + input.timeoutMs ?? FEISHU_AUTH_DEFAULT_WAIT_TIMEOUT_MS, + FEISHU_AUTH_DEFAULT_WAIT_TIMEOUT_MS + ) + const timeout = setTimeout(() => { + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authTimeout' + }) + ) + }, timeoutMs) + + try { + return await session.resultPromise + } finally { + clearTimeout(timeout) + if (session.completed) { + this.feishuAuthSessions.delete(sessionKey) + } + } + } + + async cancelFeishuAuth(sessionKey: string): Promise { + const session = this.feishuAuthSessions.get(sessionKey.trim()) + if (!session) { + return + } + + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authCancelled' + }) + ) + this.feishuAuthSessions.delete(session.sessionKey) + } + + async startFeishuInstall(input: FeishuInstallStartInput = {}): Promise { + this.pruneExpiredFeishuInstallSessions() + for (const existingSessionKey of Array.from(this.feishuInstallSessions.keys())) { + await this.cancelFeishuInstall(existingSessionKey) + } + + const requestedBrand = + input.brand === 'lark' ? 'lark' : this.bindingStore.getFeishuConfig().brand + const abortController = new AbortController() + const registration = await startFeishuPersonalAgentRegistration(abortController.signal) + const sessionKey = randomBytes(16).toString('hex') + const expiresAt = Date.now() + registration.expireInSec * 1000 + const intervalMs = Math.max(registration.intervalSec * 1000, 3_000) + let resolveResult!: (result: FeishuInstallResult) => void + const resultPromise = new Promise((resolve) => { + resolveResult = resolve + }) + const session: FeishuInstallSessionState = { + sessionKey, + requestedBrand, + pollBrand: 'feishu', + deviceCode: registration.deviceCode, + installUrl: registration.installUrl, + userCode: registration.userCode, + expiresAt, + intervalMs, + cleanupTimer: null, + abortController, + resolve: resolveResult, + resultPromise, + completed: false, + polling: false + } + + this.feishuInstallSessions.set(sessionKey, session) + session.cleanupTimer = setTimeout( + () => { + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installTimeout' + }) + ) + this.feishuInstallSessions.delete(session.sessionKey) + }, + Math.max(expiresAt - Date.now(), 1_000) + ) + + return { + sessionKey, + installUrl: registration.installUrl, + userCode: registration.userCode, + expiresAt, + intervalMs, + messageKey: 'settings.remote.feishu.installStarted' + } + } + + async waitForFeishuInstall(input: FeishuInstallWaitInput): Promise { + const sessionKey = input.sessionKey.trim() + if (!sessionKey) { + return createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installFailed' + }) + } + + const session = this.feishuInstallSessions.get(sessionKey) + if (!session) { + return createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installSessionMissing' + }) + } + + if (!session.polling) { + session.polling = true + void this.pollFeishuInstallUntilComplete(session) + } + const timeoutMs = Math.min( + input.timeoutMs ?? FEISHU_INSTALL_DEFAULT_WAIT_TIMEOUT_MS, + FEISHU_INSTALL_DEFAULT_WAIT_TIMEOUT_MS + ) + const timeout = setTimeout(() => { + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installTimeout' + }) + ) + }, timeoutMs) + + try { + return await session.resultPromise + } finally { + clearTimeout(timeout) + if (session.completed) { + this.feishuInstallSessions.delete(sessionKey) + } + } + } + + async cancelFeishuInstall(sessionKey: string): Promise { + const session = this.feishuInstallSessions.get(sessionKey.trim()) + if (!session) { + return + } + + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installCancelled' + }) + ) + this.feishuInstallSessions.delete(session.sessionKey) + } + async getQQBotSettings(): Promise { const snapshot = this.buildQQBotSettingsSnapshot() const defaultAgentId = await this.sanitizeDefaultAgentId('qqbot', snapshot.defaultAgentId) @@ -1014,7 +1372,8 @@ export class RemoteControlPresenter { appId: settings.appId.trim(), appSecret: settings.appSecret.trim(), verificationToken: settings.verificationToken.trim(), - encryptKey: settings.encryptKey.trim() + encryptKey: settings.encryptKey.trim(), + enableStreamingCards: settings.enableStreamingCards }, configSignature ) @@ -1623,6 +1982,7 @@ export class RemoteControlPresenter { verificationToken: settings.verificationToken.trim(), encryptKey: settings.encryptKey.trim(), remoteEnabled: settings.remoteEnabled, + enableStreamingCards: settings.enableStreamingCards, defaultAgentId: settings.defaultAgentId.trim(), defaultWorkdir: settings.defaultWorkdir.trim() }) @@ -1667,6 +2027,477 @@ export class RemoteControlPresenter { }) } + private async pollFeishuInstallUntilComplete(session: FeishuInstallSessionState): Promise { + if (session.completed) { + return + } + + while (!session.completed && Date.now() < session.expiresAt) { + try { + const poll = await pollFeishuPersonalAgentRegistration( + session.pollBrand, + session.deviceCode, + session.abortController.signal + ) + if (session.completed) { + return + } + + const data = poll.data + const error = readFeishuRegistrationString(data, 'error') + if (error) { + if (error === 'authorization_pending' || error === 'slow_down') { + await this.delayFeishuInstallPoll(session.intervalMs) + continue + } + + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installFailed' + }) + ) + return + } + + const userInfo = asFeishuRegistrationRecord(data.user_info) + const tenantBrand = readFeishuRegistrationString(userInfo, 'tenant_brand') + const appSecret = readFeishuRegistrationString(data, 'client_secret') + if (session.pollBrand === 'feishu' && tenantBrand === 'lark' && !appSecret) { + session.pollBrand = 'lark' + continue + } + + if (!poll.ok) { + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installFailed' + }) + ) + return + } + + const appId = readFeishuRegistrationString(data, 'client_id') + if (appId && appSecret) { + if (session.completed) { + return + } + + const brand = session.pollBrand === 'lark' || tenantBrand === 'lark' ? 'lark' : 'feishu' + const openId = readFeishuRegistrationString(userInfo, 'open_id') + await this.enqueueRuntimeOperation(async () => { + if (session.completed) { + return + } + + this.bindingStore.updateFeishuConfig((config) => ({ + ...config, + brand, + appId, + appSecret, + verificationToken: '', + encryptKey: '', + pairedUserOpenIds: openId + ? Array.from(new Set([...config.pairedUserOpenIds, openId])).sort((left, right) => + left.localeCompare(right) + ) + : config.pairedUserOpenIds, + lastFatalError: null + })) + await this.rebuildFeishuRuntime() + }) + if (session.completed) { + return + } + + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: true, + brand, + appId, + openId, + messageKey: 'settings.remote.feishu.installSuccess' + }) + ) + return + } + } catch { + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installFailed' + }) + ) + return + } + + await this.delayFeishuInstallPoll(session.intervalMs) + } + + if (!session.completed) { + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installTimeout' + }) + ) + } + } + + private delayFeishuInstallPoll(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)) + } + + private completeFeishuInstallSession( + session: FeishuInstallSessionState, + result: FeishuInstallResult + ): void { + if (session.completed) { + return + } + + session.completed = true + this.cleanupFeishuInstallSession(session) + session.resolve(result) + } + + private cleanupFeishuInstallSession(session: FeishuInstallSessionState): void { + if (session.cleanupTimer) { + clearTimeout(session.cleanupTimer) + session.cleanupTimer = null + } + + if (!session.abortController.signal.aborted) { + session.abortController.abort() + } + } + + private pruneExpiredFeishuInstallSessions(): void { + const now = Date.now() + for (const session of this.feishuInstallSessions.values()) { + if (session.expiresAt > now) { + continue + } + + this.completeFeishuInstallSession( + session, + createFeishuInstallResult({ + installed: false, + messageKey: 'settings.remote.feishu.installTimeout' + }) + ) + this.feishuInstallSessions.delete(session.sessionKey) + } + } + + private assertLoopbackFeishuAuthRedirectUri(redirectUri: string): void { + let parsed: URL + try { + parsed = new URL(redirectUri) + } catch { + throw new Error('Feishu OAuth redirect URI must be a valid URL.') + } + + const isLoopback = parsed.hostname === '127.0.0.1' || parsed.hostname === 'localhost' + if (parsed.protocol !== 'http:' || !isLoopback) { + throw new Error('Feishu OAuth redirect URI must use http://127.0.0.1 or http://localhost.') + } + } + + private async startFeishuAuthCallbackServer( + session: FeishuAuthSessionState + ): Promise { + const redirect = new URL(session.redirectUri) + const port = Number.parseInt(redirect.port, 10) + if (!Number.isInteger(port) || port <= 0) { + throw new Error('Feishu OAuth redirect URI must include a loopback port.') + } + + const server = http.createServer((request, response) => { + void this.handleFeishuAuthCallback(session, request, response) + }) + + await new Promise((resolve, reject) => { + server.once('error', reject) + server.listen(port, redirect.hostname, () => { + server.off('error', reject) + resolve() + }) + }) + + return server + } + + private async handleFeishuAuthCallback( + session: FeishuAuthSessionState, + request: http.IncomingMessage, + response: http.ServerResponse + ): Promise { + const requestUrl = request.url ?? '/' + const redirect = new URL(session.redirectUri) + const callbackUrl = new URL(requestUrl, session.redirectUri) + + if (request.method !== 'GET') { + response.writeHead(405, { 'Content-Type': 'text/plain; charset=utf-8' }) + response.end('Method Not Allowed') + return + } + + const expectedHost = `${redirect.hostname}:${redirect.port}` + const actualHost = request.headers.host?.trim().toLowerCase() + if (actualHost !== expectedHost.toLowerCase()) { + response.writeHead(400, { 'Content-Type': 'text/plain; charset=utf-8' }) + response.end('Bad Request') + return + } + + if (callbackUrl.pathname !== redirect.pathname) { + response.writeHead(404, { 'Content-Type': 'text/plain; charset=utf-8' }) + response.end('Not Found') + return + } + + const error = callbackUrl.searchParams.get('error') + const code = callbackUrl.searchParams.get('code') + const returnedState = callbackUrl.searchParams.get('state') + + if (session.completed) { + this.writeFeishuAuthCallbackPage(response, false) + return + } + + if (returnedState !== session.state) { + this.writeFeishuAuthCallbackPage(response, false) + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authStateMismatch' + }) + ) + return + } + + if (error) { + this.writeFeishuAuthCallbackPage(response, false) + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authDenied' + }) + ) + return + } + + if (!code) { + this.writeFeishuAuthCallbackPage(response, false) + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authMissingCode' + }) + ) + return + } + + try { + const accessToken = await exchangeFeishuOAuthCode( + { + brand: session.brand, + appId: session.appId, + appSecret: session.appSecret, + redirectUri: session.redirectUri + }, + code, + session.abortController.signal + ) + if (session.completed) { + this.writeFeishuAuthCallbackPage(response, false) + return + } + + const userInfo = await fetchFeishuOAuthUserInfo( + session.brand, + accessToken, + session.abortController.signal + ) + if (session.completed) { + this.writeFeishuAuthCallbackPage(response, false) + return + } + + this.bindingStore.addFeishuPairedUser(userInfo.openId) + this.writeFeishuAuthCallbackPage(response, true) + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: true, + openId: userInfo.openId, + unionId: userInfo.unionId, + name: userInfo.name, + messageKey: 'settings.remote.feishu.authSuccess' + }) + ) + } catch { + this.writeFeishuAuthCallbackPage(response, false) + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authFailed' + }) + ) + } + } + + private writeFeishuAuthCallbackPage(response: http.ServerResponse, success: boolean): void { + response.writeHead(success ? 200 : 400, { 'Content-Type': 'text/html; charset=utf-8' }) + response.end(` + + + + DeepChat Feishu Authorization + + +

${success ? 'Authorization complete' : 'Authorization failed'}

+

${success ? 'You can close this window and return to DeepChat.' : 'Return to DeepChat and try again.'}

+ +`) + } + + private completeFeishuAuthSession( + session: FeishuAuthSessionState, + result: FeishuAuthResult + ): void { + if (session.completed) { + return + } + + session.completed = true + this.cleanupFeishuAuthSession(session) + session.resolve(result) + } + + private cleanupFeishuAuthSession(session: FeishuAuthSessionState): void { + if (session.cleanupTimer) { + clearTimeout(session.cleanupTimer) + session.cleanupTimer = null + } + + if (!session.abortController.signal.aborted) { + session.abortController.abort() + } + + if (session.server) { + session.server.close() + session.server = null + } + + if (session.window && !session.window.isDestroyed()) { + session.window.close() + } + session.window = null + } + + private pruneExpiredFeishuAuthSessions(): void { + const now = Date.now() + for (const session of this.feishuAuthSessions.values()) { + if (session.expiresAt > now) { + continue + } + + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authTimeout' + }) + ) + this.feishuAuthSessions.delete(session.sessionKey) + } + } + + private isAllowedFeishuAuthNavigation( + url: string, + brand: 'feishu' | 'lark', + redirectUri: string + ): boolean { + try { + const parsed = new URL(url) + const redirect = new URL(redirectUri) + const domains = resolveFeishuAuthDomains(brand) + const accountsHost = new URL(domains.accountsBaseUrl).host + const openHost = new URL(domains.openBaseUrl).host + const isAuthHost = + parsed.protocol === 'https:' && (parsed.host === accountsHost || parsed.host === openHost) + const isRedirectHost = + parsed.protocol === redirect.protocol && + parsed.host === redirect.host && + parsed.pathname === redirect.pathname + return isAuthHost || isRedirectHost + } catch { + return false + } + } + + private openFeishuAuthWindow(session: FeishuAuthSessionState): void { + const parentWindow = + this.deps.windowPresenter.getFocusedWindow() ?? this.deps.windowPresenter.getAllWindows()[0] + const loginWindow = new BrowserWindow({ + width: 480, + height: 760, + minWidth: 420, + minHeight: 680, + autoHideMenuBar: true, + title: 'Feishu / Lark Authorization', + ...(parentWindow ? { parent: parentWindow } : {}), + webPreferences: { + nodeIntegration: false, + contextIsolation: true, + webSecurity: true + } + }) + + loginWindow.webContents.on('will-navigate', (event, url) => { + if (!this.isAllowedFeishuAuthNavigation(url, session.brand, session.redirectUri)) { + event.preventDefault() + } + }) + loginWindow.webContents.setWindowOpenHandler(({ url }) => { + if (this.isAllowedFeishuAuthNavigation(url, session.brand, session.redirectUri)) { + void loginWindow.loadURL(url) + } + return { action: 'deny' } + }) + loginWindow.on('closed', () => { + if (session.window === loginWindow && !session.completed) { + this.completeFeishuAuthSession( + session, + createFeishuAuthResult({ + authorized: false, + messageKey: 'settings.remote.feishu.authCancelled' + }) + ) + } + if (session.window === loginWindow) { + session.window = null + } + }) + + void loginWindow.loadURL(session.authUrl) + loginWindow.show() + loginWindow.focus() + session.window = loginWindow + } + private openWeixinIlinkLoginWindow(loginUrl: string | null | undefined): void { const normalizedLoginUrl = loginUrl?.trim() if (!normalizedLoginUrl) { diff --git a/src/main/presenter/remoteControlPresenter/types.ts b/src/main/presenter/remoteControlPresenter/types.ts index 6d317a015..c77310dc8 100644 --- a/src/main/presenter/remoteControlPresenter/types.ts +++ b/src/main/presenter/remoteControlPresenter/types.ts @@ -309,6 +309,7 @@ export interface FeishuRemoteRuntimeConfig { verificationToken: string encryptKey: string enabled: boolean + enableStreamingCards: boolean defaultAgentId: string defaultWorkdir: string pairedUserOpenIds: string[] @@ -933,6 +934,7 @@ export const createDefaultRemoteControlConfig = (): RemoteControlConfig => ({ verificationToken: '', encryptKey: '', enabled: false, + enableStreamingCards: false, defaultAgentId: FEISHU_REMOTE_DEFAULT_AGENT_ID, defaultWorkdir: '', pairedUserOpenIds: [], @@ -982,122 +984,103 @@ export const createDefaultRemoteControlConfig = (): RemoteControlConfig => ({ } }) -const RemoteEndpointBindingMetaSchema = z - .object({ - channel: z.enum(['telegram', 'feishu', 'qqbot', 'discord', 'weixin-ilink']).optional(), - kind: z.enum(['dm', 'group', 'topic']).optional(), - chatId: z.string().optional(), - threadId: z.string().nullable().optional() - }) - .strip() +const RemoteEndpointBindingMetaSchema = z.object({ + channel: z.enum(['telegram', 'feishu', 'qqbot', 'discord', 'weixin-ilink']).optional(), + kind: z.enum(['dm', 'group', 'topic']).optional(), + chatId: z.string().optional(), + threadId: z.string().nullable().optional() +}) -const RemoteEndpointBindingSchema = z - .object({ - sessionId: z.string().min(1), - updatedAt: z.number().int().nonnegative().optional(), - meta: RemoteEndpointBindingMetaSchema.optional() - }) - .strip() +const RemoteEndpointBindingSchema = z.object({ + sessionId: z.string().min(1), + updatedAt: z.number().int().nonnegative().optional(), + meta: RemoteEndpointBindingMetaSchema.optional() +}) -const PairingStateSchema = z - .object({ - code: z.string().nullable().optional(), - expiresAt: z.number().int().nonnegative().nullable().optional(), - failedAttempts: z.number().int().nonnegative().optional() - }) - .strip() - -const TelegramRemoteRuntimeConfigSchema = z - .object({ - botToken: z.string().optional(), - enabled: z.boolean().optional(), - allowlist: z.array(z.union([z.number(), z.string()])).optional(), - defaultAgentId: z.string().optional(), - defaultWorkdir: z.string().optional(), - streamMode: z.enum(['draft', 'final']).optional(), - pollOffset: z.number().int().nonnegative().optional(), - lastFatalError: z.string().nullable().optional(), - pairing: PairingStateSchema.optional(), - bindings: z.record(z.string(), z.unknown()).optional() - }) - .strip() - -const FeishuRemoteRuntimeConfigSchema = z - .object({ - brand: z.enum(['feishu', 'lark']).optional(), - appId: z.string().optional(), - appSecret: z.string().optional(), - verificationToken: z.string().optional(), - encryptKey: z.string().optional(), - enabled: z.boolean().optional(), - defaultAgentId: z.string().optional(), - defaultWorkdir: z.string().optional(), - pairedUserOpenIds: z.array(z.string()).optional(), - lastFatalError: z.string().nullable().optional(), - pairing: PairingStateSchema.optional(), - bindings: z.record(z.string(), z.unknown()).optional() - }) - .strip() - -const QQBotRemoteRuntimeConfigSchema = z - .object({ - appId: z.string().optional(), - clientSecret: z.string().optional(), - enabled: z.boolean().optional(), - defaultAgentId: z.string().optional(), - defaultWorkdir: z.string().optional(), - pairedUserIds: z.array(z.union([z.string(), z.number()])).optional(), - pairedGroupIds: z.array(z.union([z.string(), z.number()])).optional(), - lastFatalError: z.string().nullable().optional(), - pairing: PairingStateSchema.optional(), - bindings: z.record(z.string(), z.unknown()).optional() - }) - .strip() - -const DiscordRemoteRuntimeConfigSchema = z - .object({ - botToken: z.string().optional(), - enabled: z.boolean().optional(), - defaultAgentId: z.string().optional(), - defaultWorkdir: z.string().optional(), - pairedChannelIds: z.array(z.union([z.string(), z.number()])).optional(), - lastFatalError: z.string().nullable().optional(), - pairing: PairingStateSchema.optional(), - bindings: z.record(z.string(), z.unknown()).optional() - }) - .strip() - -const WeixinIlinkAccountRuntimeConfigSchema = z - .object({ - accountId: z.string().optional(), - ownerUserId: z.string().optional(), - baseUrl: z.string().optional(), - botToken: z.string().optional(), - enabled: z.boolean().optional(), - syncCursor: z.string().optional(), - lastFatalError: z.string().nullable().optional(), - bindings: z.record(z.string(), z.unknown()).optional() - }) - .strip() - -const WeixinIlinkRemoteRuntimeConfigSchema = z - .object({ - enabled: z.boolean().optional(), - defaultAgentId: z.string().optional(), - defaultWorkdir: z.string().optional(), - accounts: z.array(WeixinIlinkAccountRuntimeConfigSchema).optional() - }) - .strip() - -const RemoteControlConfigSchema = z - .object({ - telegram: TelegramRemoteRuntimeConfigSchema.optional(), - feishu: FeishuRemoteRuntimeConfigSchema.optional(), - qqbot: QQBotRemoteRuntimeConfigSchema.optional(), - discord: DiscordRemoteRuntimeConfigSchema.optional(), - weixinIlink: WeixinIlinkRemoteRuntimeConfigSchema.optional() - }) - .strip() +const PairingStateSchema = z.object({ + code: z.string().nullable().optional(), + expiresAt: z.number().int().nonnegative().nullable().optional(), + failedAttempts: z.number().int().nonnegative().optional() +}) + +const TelegramRemoteRuntimeConfigSchema = z.object({ + botToken: z.string().optional(), + enabled: z.boolean().optional(), + allowlist: z.array(z.union([z.number(), z.string()])).optional(), + defaultAgentId: z.string().optional(), + defaultWorkdir: z.string().optional(), + streamMode: z.enum(['draft', 'final']).optional(), + pollOffset: z.number().int().nonnegative().optional(), + lastFatalError: z.string().nullable().optional(), + pairing: PairingStateSchema.optional(), + bindings: z.record(z.string(), z.unknown()).optional() +}) + +const FeishuRemoteRuntimeConfigSchema = z.object({ + brand: z.enum(['feishu', 'lark']).optional(), + appId: z.string().optional(), + appSecret: z.string().optional(), + verificationToken: z.string().optional(), + encryptKey: z.string().optional(), + enabled: z.boolean().optional(), + enableStreamingCards: z.boolean().optional(), + defaultAgentId: z.string().optional(), + defaultWorkdir: z.string().optional(), + pairedUserOpenIds: z.array(z.string()).optional(), + lastFatalError: z.string().nullable().optional(), + pairing: PairingStateSchema.optional(), + bindings: z.record(z.string(), z.unknown()).optional() +}) + +const QQBotRemoteRuntimeConfigSchema = z.object({ + appId: z.string().optional(), + clientSecret: z.string().optional(), + enabled: z.boolean().optional(), + defaultAgentId: z.string().optional(), + defaultWorkdir: z.string().optional(), + pairedUserIds: z.array(z.union([z.string(), z.number()])).optional(), + pairedGroupIds: z.array(z.union([z.string(), z.number()])).optional(), + lastFatalError: z.string().nullable().optional(), + pairing: PairingStateSchema.optional(), + bindings: z.record(z.string(), z.unknown()).optional() +}) + +const DiscordRemoteRuntimeConfigSchema = z.object({ + botToken: z.string().optional(), + enabled: z.boolean().optional(), + defaultAgentId: z.string().optional(), + defaultWorkdir: z.string().optional(), + pairedChannelIds: z.array(z.union([z.string(), z.number()])).optional(), + lastFatalError: z.string().nullable().optional(), + pairing: PairingStateSchema.optional(), + bindings: z.record(z.string(), z.unknown()).optional() +}) + +const WeixinIlinkAccountRuntimeConfigSchema = z.object({ + accountId: z.string().optional(), + ownerUserId: z.string().optional(), + baseUrl: z.string().optional(), + botToken: z.string().optional(), + enabled: z.boolean().optional(), + syncCursor: z.string().optional(), + lastFatalError: z.string().nullable().optional(), + bindings: z.record(z.string(), z.unknown()).optional() +}) + +const WeixinIlinkRemoteRuntimeConfigSchema = z.object({ + enabled: z.boolean().optional(), + defaultAgentId: z.string().optional(), + defaultWorkdir: z.string().optional(), + accounts: z.array(WeixinIlinkAccountRuntimeConfigSchema).optional() +}) + +const RemoteControlConfigSchema = z.object({ + telegram: TelegramRemoteRuntimeConfigSchema.optional(), + feishu: FeishuRemoteRuntimeConfigSchema.optional(), + qqbot: QQBotRemoteRuntimeConfigSchema.optional(), + discord: DiscordRemoteRuntimeConfigSchema.optional(), + weixinIlink: WeixinIlinkRemoteRuntimeConfigSchema.optional() +}) type LegacyTelegramRemoteConfig = z.infer type LegacyFeishuRemoteConfig = z.infer @@ -1127,7 +1110,7 @@ const extractLegacyTelegramConfig = (input: unknown): LegacyTelegramRemoteConfig const record = input as Record if ( - !hasAnyOwn(record, ['allowlist', 'streamMode', 'pollOffset', 'lastFatalError']) && + !hasAnyOwn(record, ['botToken', 'allowlist', 'streamMode', 'pollOffset', 'lastFatalError']) && !hasBindingPrefix(record, 'telegram:') ) { return null @@ -1149,6 +1132,7 @@ const extractLegacyFeishuConfig = (input: unknown): LegacyFeishuRemoteConfig | n 'appSecret', 'verificationToken', 'encryptKey', + 'enableStreamingCards', 'pairedUserOpenIds', 'lastFatalError' ]) && @@ -1367,6 +1351,9 @@ const normalizeBindings = ( return bindings } +const resolveRemoteEnabled = (enabled: boolean | undefined, configured: boolean): boolean => + typeof enabled === 'boolean' ? enabled : configured + export const normalizeRemoteControlConfig = (input: unknown): RemoteControlConfig => { const defaults = createDefaultRemoteControlConfig() const parsed = RemoteControlConfigSchema.safeParse(input) @@ -1379,11 +1366,12 @@ export const normalizeRemoteControlConfig = (input: unknown): RemoteControlConfi const qqbot = parsed.data.qqbot ?? extractLegacyQQBotConfig(input) ?? {} const discord = parsed.data.discord ?? extractLegacyDiscordConfig(input) ?? {} const weixinIlink = parsed.data.weixinIlink ?? extractLegacyWeixinIlinkConfig(input) ?? {} + const weixinIlinkAccounts = normalizeWeixinIlinkRuntimeAccounts(weixinIlink.accounts) return { telegram: { botToken: telegram.botToken?.trim() || '', - enabled: Boolean(telegram.enabled), + enabled: resolveRemoteEnabled(telegram.enabled, Boolean(telegram.botToken?.trim())), allowlist: normalizeTelegramUserIds(telegram.allowlist), streamMode: telegram.streamMode === 'final' ? 'final' : defaults.telegram.streamMode, defaultAgentId: telegram.defaultAgentId?.trim() || defaults.telegram.defaultAgentId, @@ -1411,7 +1399,11 @@ export const normalizeRemoteControlConfig = (input: unknown): RemoteControlConfi appSecret: feishu.appSecret?.trim() || '', verificationToken: feishu.verificationToken?.trim() || '', encryptKey: feishu.encryptKey?.trim() || '', - enabled: Boolean(feishu.enabled), + enabled: resolveRemoteEnabled( + feishu.enabled, + Boolean(feishu.appId?.trim() && feishu.appSecret?.trim()) + ), + enableStreamingCards: Boolean(feishu.enableStreamingCards), defaultAgentId: feishu.defaultAgentId?.trim() || defaults.feishu.defaultAgentId, defaultWorkdir: feishu.defaultWorkdir?.trim() || '', pairedUserOpenIds: normalizeFeishuOpenIds(feishu.pairedUserOpenIds), @@ -1429,7 +1421,10 @@ export const normalizeRemoteControlConfig = (input: unknown): RemoteControlConfi qqbot: { appId: qqbot.appId?.trim() || '', clientSecret: qqbot.clientSecret?.trim() || '', - enabled: Boolean(qqbot.enabled), + enabled: resolveRemoteEnabled( + qqbot.enabled, + Boolean(qqbot.appId?.trim() && qqbot.clientSecret?.trim()) + ), defaultAgentId: qqbot.defaultAgentId?.trim() || defaults.qqbot.defaultAgentId, defaultWorkdir: qqbot.defaultWorkdir?.trim() || '', pairedUserIds: normalizeQQBotUserIds(qqbot.pairedUserIds), @@ -1447,7 +1442,7 @@ export const normalizeRemoteControlConfig = (input: unknown): RemoteControlConfi }, discord: { botToken: discord.botToken?.trim() || '', - enabled: Boolean(discord.enabled), + enabled: resolveRemoteEnabled(discord.enabled, Boolean(discord.botToken?.trim())), defaultAgentId: discord.defaultAgentId?.trim() || defaults.discord.defaultAgentId, defaultWorkdir: discord.defaultWorkdir?.trim() || '', pairedChannelIds: normalizeDiscordChannelIds(discord.pairedChannelIds), @@ -1464,10 +1459,10 @@ export const normalizeRemoteControlConfig = (input: unknown): RemoteControlConfi bindings: normalizeBindings(discord.bindings, 'discord') }, weixinIlink: { - enabled: Boolean(weixinIlink.enabled), + enabled: resolveRemoteEnabled(weixinIlink.enabled, weixinIlinkAccounts.length > 0), defaultAgentId: weixinIlink.defaultAgentId?.trim() || defaults.weixinIlink.defaultAgentId, defaultWorkdir: weixinIlink.defaultWorkdir?.trim() || '', - accounts: normalizeWeixinIlinkRuntimeAccounts(weixinIlink.accounts) + accounts: weixinIlinkAccounts } } } @@ -1721,6 +1716,7 @@ export const normalizeFeishuSettingsInput = ( verificationToken: input.verificationToken?.trim() ?? '', encryptKey: input.encryptKey?.trim() ?? '', remoteEnabled: Boolean(input.remoteEnabled), + enableStreamingCards: Boolean(input.enableStreamingCards), defaultAgentId: input.defaultAgentId?.trim() || FEISHU_REMOTE_DEFAULT_AGENT_ID, defaultWorkdir: input.defaultWorkdir?.trim() ?? '', pairedUserOpenIds: normalizeFeishuOpenIds(input.pairedUserOpenIds) diff --git a/src/main/presenter/remoteControlPresenter/types/channel.ts b/src/main/presenter/remoteControlPresenter/types/channel.ts index f5155c472..7e8a9fcaf 100644 --- a/src/main/presenter/remoteControlPresenter/types/channel.ts +++ b/src/main/presenter/remoteControlPresenter/types/channel.ts @@ -129,17 +129,15 @@ const relativePathSchema = z 'must be a relative path inside the plugin package' ) -const ChannelPluginManifestSchema = z - .object({ - schemaVersion: z.literal(CHANNEL_PLUGIN_SCHEMA_VERSION), - pluginId: z.string().regex(CHANNEL_PLUGIN_ID_PATTERN), - apiVersion: z.literal(CHANNEL_PLUGIN_API_VERSION), - entry: relativePathSchema, - types: relativePathSchema, - channelType: z.string().regex(CHANNEL_TYPE_PATTERN), - configSchema: relativePathSchema.optional() - }) - .strip() +const ChannelPluginManifestSchema = z.object({ + schemaVersion: z.literal(CHANNEL_PLUGIN_SCHEMA_VERSION), + pluginId: z.string().regex(CHANNEL_PLUGIN_ID_PATTERN), + apiVersion: z.literal(CHANNEL_PLUGIN_API_VERSION), + entry: relativePathSchema, + types: relativePathSchema, + channelType: z.string().regex(CHANNEL_TYPE_PATTERN), + configSchema: relativePathSchema.optional() +}) export const parseChannelPluginManifest = (input: unknown): ChannelPluginManifest => ChannelPluginManifestSchema.parse(input) diff --git a/src/main/presenter/scheduledTasks/normalize.ts b/src/main/presenter/scheduledTasks/normalize.ts index b4ad15951..19a68100b 100644 --- a/src/main/presenter/scheduledTasks/normalize.ts +++ b/src/main/presenter/scheduledTasks/normalize.ts @@ -53,12 +53,10 @@ const ScheduledTaskSchema = z.object({ lastFiredAt: z.number().int().nonnegative().nullable() }) -const LooseSchedulerSettingsSchema = z - .object({ - version: z.unknown().optional(), - tasks: z.array(z.unknown()).optional() - }) - .strip() +const LooseSchedulerSettingsSchema = z.object({ + version: z.unknown().optional(), + tasks: z.array(z.unknown()).optional() +}) const sanitizeTrigger = (input: unknown): ScheduledTaskTrigger | null => { const parsed = TriggerSchema.safeParse(input) diff --git a/src/main/presenter/skillPresenter/index.ts b/src/main/presenter/skillPresenter/index.ts index d3d7fdb73..d143893fe 100644 --- a/src/main/presenter/skillPresenter/index.ts +++ b/src/main/presenter/skillPresenter/index.ts @@ -1,7 +1,9 @@ import { app, shell } from 'electron' import path from 'path' import fs from 'fs' +import { execFile } from 'node:child_process' import { randomUUID } from 'node:crypto' +import { promisify } from 'node:util' import matter from 'gray-matter' import { unzipSync } from 'fflate' import type { IConfigPresenter } from '@shared/presenter' @@ -20,7 +22,18 @@ import { SkillInstallResult, SkillFolderNode, SkillInstallOptions, + GitSkillInstallInput, + GitSkillRepoScanItem, + GitSkillRepoScanResult, + SkillAdoptionRegistration, + SkillAgentLinkRegistration, SkillExtensionConfig, + SkillSyncDirectoryExportInput, + SkillSyncDirectoryExportPreview, + SkillSyncDirectoryImportInput, + SkillSyncDirectoryImportPreview, + SkillSyncDirectoryPreviewItem, + SkillSyncDirectoryResult, SkillManageRequest, SkillManageResult, SkillDraftActionResult, @@ -30,11 +43,21 @@ import { SkillViewResult, SkillLinkedFile } from '@shared/types/skill' +import type { + SkillManagementItem, + SkillManagementState, + SkillSyncDirectoryConfig, + SkillSource, + SkillSourceType, + UnifiedSkillItem +} from '@shared/types/skillManagement' import { publishDeepchatEvent } from '@/routes/publishDeepchatEvent' import logger from '@shared/logger' import { normalizeSkillAllowedTools } from './toolNameMapping' import { discoverSkillMetadataInWorker, logSkillDiscoveryWorkerWarnings } from './discoveryWorker' +const execFileAsync = promisify(execFile) + /** * Skill system configuration constants */ @@ -112,6 +135,7 @@ const DRAFT_ALLOWED_TOP_LEVEL_DIRS = new Set(['references', 'templates', 'script const DRAFT_CONVERSATION_ID_PATTERN = /^[A-Za-z0-9._-]+$/ const DRAFT_ID_PATTERN = /^[A-Za-z0-9._-]+$/ const DRAFT_ACTIVITY_MARKER = '.lastActivity' +const SKILL_MANAGEMENT_STATE_KEY = 'skills.managementState' const DRAFT_INJECTION_PATTERNS = [ /ignore\s+previous\s+instructions/i, /disregard\s+all\s+prior/i, @@ -282,9 +306,6 @@ export class SkillPresenter implements ISkillPresenter { if (!fs.existsSync(this.skillsDir)) { fs.mkdirSync(this.skillsDir, { recursive: true }) } - if (!fs.existsSync(this.sidecarDir)) { - fs.mkdirSync(this.sidecarDir, { recursive: true }) - } } /** @@ -482,7 +503,181 @@ export class SkillPresenter implements ISkillPresenter { } private isSkillVisible(metadata: SkillMetadata): boolean { - return Boolean(metadata) + return Boolean(metadata) && !this.isSkillDeepChatDisabled(metadata.name) + } + + private createDefaultManagementState(): SkillManagementState { + return { + version: 1, + skills: {} + } + } + + private getStoredManagementState(): SkillManagementState { + const stored = this.configPresenter.getSetting(SKILL_MANAGEMENT_STATE_KEY) + if (!stored || typeof stored !== 'object') { + return this.createDefaultManagementState() + } + + const candidate = stored as Partial + const skills: Record = {} + for (const [name, item] of Object.entries(candidate.skills ?? {})) { + if (!this.isSafeSkillName(name) || !item || typeof item !== 'object') { + continue + } + const raw = item as Partial + skills[name] = { + name, + canonicalPath: + typeof raw.canonicalPath === 'string' && raw.canonicalPath.trim() + ? raw.canonicalPath + : path.join(this.skillsDir, name), + deepchat: { + disabled: raw.deepchat?.disabled === true + }, + extension: sanitizeSkillExtensionConfig(raw.extension), + source: this.sanitizeSkillSource(raw.source), + agentLinks: + raw.agentLinks && typeof raw.agentLinks === 'object' + ? (raw.agentLinks as SkillManagementItem['agentLinks']) + : undefined + } + } + + return { + version: 1, + skills, + sync: this.sanitizeSyncDirectoryConfig(candidate.sync) + } + } + + private sanitizeSyncDirectoryConfig(value: unknown): SkillSyncDirectoryConfig | undefined { + const raw = + value && typeof value === 'object' ? (value as Partial) : {} + if (typeof raw.skillsDirectory !== 'string' || !raw.skillsDirectory.trim()) { + return undefined + } + + return { + skillsDirectory: path.resolve(raw.skillsDirectory), + layout: 'multi-skill-repo', + lastExportAt: typeof raw.lastExportAt === 'string' ? raw.lastExportAt : null, + lastImportAt: typeof raw.lastImportAt === 'string' ? raw.lastImportAt : null + } + } + + private saveManagementState(state: SkillManagementState): void { + this.configPresenter.setSetting(SKILL_MANAGEMENT_STATE_KEY, state) + } + + private sanitizeSkillSource(value: unknown): SkillSource { + const raw = value && typeof value === 'object' ? (value as Partial) : {} + const source: SkillSource = { + type: this.normalizeSkillSourceType(raw.type) + } + if (typeof raw.repoUrl === 'string') source.repoUrl = raw.repoUrl + if (raw.repoFormat === 'single-skill' || raw.repoFormat === 'multi-skill') { + source.repoFormat = raw.repoFormat + } + if (typeof raw.agentId === 'string') source.agentId = raw.agentId + if (typeof raw.originalPath === 'string') source.originalPath = raw.originalPath + if (typeof raw.importedFrom === 'string') source.importedFrom = raw.importedFrom + if (typeof raw.installedAt === 'string') source.installedAt = raw.installedAt + if (typeof raw.importedAt === 'string') source.importedAt = raw.importedAt + if (typeof raw.adoptedAt === 'string') source.adoptedAt = raw.adoptedAt + return source + } + + private normalizeSkillSourceType(value: unknown): SkillSourceType { + const allowed: SkillSourceType[] = [ + 'builtin', + 'created', + 'folder-install', + 'zip-install', + 'url-install', + 'git-install', + 'adopted', + 'imported' + ] + return typeof value === 'string' && allowed.includes(value as SkillSourceType) + ? (value as SkillSourceType) + : 'created' + } + + private createDefaultManagementItem(name: string): SkillManagementItem { + return { + name, + canonicalPath: path.join(this.skillsDir, name), + deepchat: { + disabled: false + }, + extension: createDefaultSkillExtensionConfig(), + source: { + type: 'created' + } + } + } + + private updateSkillManagementItem( + name: string, + updater: (item: SkillManagementItem) => SkillManagementItem + ): SkillManagementItem { + const state = this.getStoredManagementState() + const nextItem = updater(state.skills[name] ?? this.createDefaultManagementItem(name)) + state.skills[name] = nextItem + this.saveManagementState(state) + return nextItem + } + + private isSkillDeepChatDisabled(name: string): boolean { + return this.getStoredManagementState().skills[name]?.deepchat.disabled === true + } + + async getSkillManagementState(): Promise { + return this.getStoredManagementState() + } + + async setSkillDeepChatDisabled(name: string, disabled: boolean): Promise { + if (this.metadataCache.size === 0) { + await this.discoverSkills() + } + if (!this.metadataCache.has(name)) { + throw new Error(`Skill "${name}" not found`) + } + + this.updateSkillManagementItem(name, (item) => ({ + ...item, + canonicalPath: this.metadataCache.get(name)?.skillRoot ?? item.canonicalPath, + deepchat: { + ...item.deepchat, + disabled + } + })) + this.contentCache.delete(name) + publishDeepchatEvent('skills.catalog.changed', { + reason: 'disabled-updated', + name, + version: Date.now() + }) + } + + async getUnifiedSkillCatalog(): Promise { + if (this.metadataCache.size === 0) { + await this.discoverSkills() + } + + const state = this.getStoredManagementState() + return this.sortSkillMetadata(Array.from(this.metadataCache.values())).map((skill) => { + const item = state.skills[skill.name] ?? this.createDefaultManagementItem(skill.name) + return { + ...skill, + canonicalPath: item.canonicalPath || skill.skillRoot, + sourceType: item.source.type, + deepchatDisabled: item.deepchat.disabled, + agentLinks: item.agentLinks ?? {}, + mutable: !skill.ownerPluginId + } + }) } private sortSkillMetadata(skills: SkillMetadata[]): SkillMetadata[] { @@ -678,16 +873,6 @@ export class SkillPresenter implements ISkillPresenter { const rawContent = await fs.promises.readFile(metadata.path, 'utf-8') const { content } = matter(rawContent) - let nextIsPinned = isPinned - - if (options?.conversationId && !isPinned) { - const updatedSkills = await this.setActiveSkills(options.conversationId, [ - ...pinnedSkills, - metadata.name - ]) - nextIsPinned = updatedSkills.includes(metadata.name) - } - return { success: true, name: metadata.name, @@ -698,7 +883,7 @@ export class SkillPresenter implements ISkillPresenter { platforms: metadata.platforms, metadata: metadata.metadata, linkedFiles: await this.listSkillLinkedFiles(metadata.skillRoot), - isPinned: nextIsPinned + isPinned } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error) @@ -1080,7 +1265,7 @@ export class SkillPresenter implements ISkillPresenter { continue } - const result = await this.installFromDirectory(skillDir, { overwrite: false }) + const result = await this.installFromDirectory(skillDir, { overwrite: false }, 'builtin') if (!result.success && result.error?.includes('already exists')) { continue } @@ -1140,7 +1325,7 @@ export class SkillPresenter implements ISkillPresenter { folderPath: string, options?: SkillInstallOptions ): Promise { - return this.installFromDirectory(folderPath, options) + return this.installFromDirectory(folderPath, options, 'folder-install') } /** @@ -1161,7 +1346,7 @@ export class SkillPresenter implements ISkillPresenter { if (!skillDir) { return { success: false, error: 'SKILL.md not found in zip archive' } } - return await this.installFromDirectory(skillDir, options) + return await this.installFromDirectory(skillDir, options, 'zip-install') } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error) return { success: false, error: errorMsg, errorCode: 'io_error' } @@ -1177,7 +1362,17 @@ export class SkillPresenter implements ISkillPresenter { const tempZipPath = path.join(app.getPath('temp'), `deepchat-skill-${Date.now()}.zip`) try { await this.downloadSkillZip(url, tempZipPath) - return await this.installFromZip(tempZipPath, options) + const result = await this.installFromZip(tempZipPath, options) + if (result.success && result.skillName) { + this.updateSkillManagementItem(result.skillName, (item) => ({ + ...item, + source: { + type: 'url-install', + installedAt: new Date().toISOString() + } + })) + } + return result } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error) return { success: false, error: errorMsg, errorCode: 'io_error' } @@ -1188,6 +1383,277 @@ export class SkillPresenter implements ISkillPresenter { } } + async scanGitSkillRepo(repoUrl: string): Promise { + const normalizedRepoUrl = repoUrl.trim() + if (!normalizedRepoUrl) { + throw new Error('Git repository URL is required') + } + + const cloneDir = await this.cloneGitSkillRepo(normalizedRepoUrl) + try { + return await this.scanGitSkillRepoDirectory(normalizedRepoUrl, cloneDir) + } finally { + fs.rmSync(cloneDir, { recursive: true, force: true }) + } + } + + async installSkillsFromGit(input: GitSkillInstallInput): Promise { + const repoUrl = input.repoUrl.trim() + const selected = new Set(input.skillNames) + const strategy = input.strategy ?? 'rename' + if (!repoUrl || selected.size === 0) { + return [] + } + + const cloneDir = await this.cloneGitSkillRepo(repoUrl) + try { + const scan = await this.scanGitSkillRepoDirectory(repoUrl, cloneDir) + const selectedItems = scan.skills.filter((item) => selected.has(item.name)) + const results: SkillInstallResult[] = [] + + for (const item of selectedItems) { + if (!item.valid) { + results.push({ + success: false, + skillName: item.name, + error: item.error ?? 'Invalid skill', + errorCode: 'invalid_skill' + }) + continue + } + + if (item.conflict && strategy === 'skip') { + results.push({ + success: false, + skillName: item.name, + existingSkillName: item.name, + error: `Skill "${item.name}" already exists`, + errorCode: 'conflict' + }) + continue + } + + const sourceDir = + scan.repoFormat === 'single-skill' + ? cloneDir + : path.join(cloneDir, item.relativePath.replace(/\/SKILL\.md$/, '')) + const targetName = + item.conflict && strategy === 'rename' ? this.createUniqueSkillName(item.name) : item.name + const result = await this.installFromDirectory( + sourceDir, + { overwrite: item.conflict && strategy === 'overwrite' }, + 'git-install', + { + repoUrl, + repoFormat: scan.repoFormat, + installedAt: new Date().toISOString() + }, + targetName + ) + results.push(result) + } + + if (results.some((result) => result.success)) { + publishDeepchatEvent('skills.catalog.changed', { + reason: 'git-installed', + version: Date.now() + }) + } + + return results + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error) + return [{ success: false, error: errorMsg, errorCode: 'io_error' }] + } finally { + fs.rmSync(cloneDir, { recursive: true, force: true }) + } + } + + async getSkillsSyncConfig(): Promise { + return this.getStoredManagementState().sync ?? null + } + + async setSkillsSyncDirectory(input: { + skillsDirectory: string + }): Promise { + const skillsDirectory = path.resolve(input.skillsDirectory.trim()) + const config: SkillSyncDirectoryConfig = { + skillsDirectory, + layout: 'multi-skill-repo', + lastExportAt: null, + lastImportAt: null + } + + fs.mkdirSync(path.join(skillsDirectory, 'skills'), { recursive: true }) + const state = this.getStoredManagementState() + state.sync = { + ...state.sync, + ...config + } + this.saveManagementState(state) + publishDeepchatEvent('skills.catalog.changed', { + reason: 'sync-directory-updated', + version: Date.now() + }) + return state.sync + } + + async previewSyncDirectoryExport( + input: SkillSyncDirectoryExportInput + ): Promise { + const config = this.requireSyncDirectoryConfig() + const selected = new Set(input.skillNames) + const skills = (await this.getUnifiedSkillCatalog()).filter((skill) => { + if (!selected.has(skill.name)) return false + return input.includeDisabled === true || !skill.deepchatDisabled + }) + + return { + skillsDirectory: config.skillsDirectory, + items: skills.map((skill) => { + const targetPath = path.join(config.skillsDirectory, 'skills', skill.name) + if (!skill.mutable || !fs.existsSync(path.join(skill.skillRoot, 'SKILL.md'))) { + return { + name: skill.name, + state: 'invalid', + sourcePath: skill.skillRoot, + targetPath, + error: 'Skill cannot be exported' + } + } + return { + name: skill.name, + state: this.resolveExportPreviewState(skill.skillRoot, targetPath), + sourcePath: skill.skillRoot, + targetPath + } + }) + } + } + + async executeSyncDirectoryExport( + input: SkillSyncDirectoryExportInput + ): Promise { + const preview = await this.previewSyncDirectoryExport(input) + let exported = 0 + let skipped = 0 + const failed: Array<{ skillName: string; reason: string }> = [] + + fs.mkdirSync(path.join(preview.skillsDirectory, 'skills'), { recursive: true }) + this.ensureSyncDirectoryReadme(preview.skillsDirectory) + + for (const item of preview.items) { + if (item.state === 'invalid') { + skipped += 1 + failed.push({ skillName: item.name, reason: item.error ?? 'Invalid skill' }) + continue + } + + try { + fs.rmSync(item.targetPath, { recursive: true, force: true }) + this.copyDirectory(item.sourcePath, item.targetPath) + exported += 1 + } catch (error) { + failed.push({ + skillName: item.name, + reason: error instanceof Error ? error.message : String(error) + }) + } + } + + if (exported > 0) { + this.updateSyncDirectoryConfig({ lastExportAt: new Date().toISOString() }) + } + + return { + success: failed.length === 0, + exported, + skipped, + failed + } + } + + async previewSyncDirectoryImport(): Promise { + const config = this.requireSyncDirectoryConfig() + const skillsRoot = path.join(config.skillsDirectory, 'skills') + const items: SkillSyncDirectoryPreviewItem[] = [] + if (!fs.existsSync(skillsRoot)) { + return { skillsDirectory: config.skillsDirectory, items } + } + + for (const entry of fs.readdirSync(skillsRoot, { withFileTypes: true })) { + if (!entry.isDirectory()) continue + const sourcePath = path.join(skillsRoot, entry.name) + const targetPath = path.join(this.skillsDir, entry.name) + items.push(this.createImportPreviewItem(sourcePath, targetPath)) + } + + return { + skillsDirectory: config.skillsDirectory, + items: items.sort((left, right) => left.name.localeCompare(right.name)) + } + } + + async executeSyncDirectoryImport( + input: SkillSyncDirectoryImportInput + ): Promise { + const preview = await this.previewSyncDirectoryImport() + const selected = new Set(input.skillNames) + const strategy = input.strategy ?? 'rename' + let imported = 0 + let skipped = 0 + const failed: Array<{ skillName: string; reason: string }> = [] + + for (const item of preview.items.filter((candidate) => selected.has(candidate.name))) { + if (item.state === 'invalid' || item.state === 'same') { + skipped += 1 + if (item.state === 'invalid') { + failed.push({ skillName: item.name, reason: item.error ?? 'Invalid skill' }) + } + continue + } + + if ((item.state === 'conflict' || item.state === 'modified') && strategy === 'skip') { + skipped += 1 + continue + } + + const targetName = + (item.state === 'conflict' || item.state === 'modified') && strategy === 'rename' + ? this.createUniqueSkillName(item.name) + : item.name + const result = await this.installFromDirectory( + item.sourcePath, + { overwrite: strategy === 'overwrite' }, + 'imported', + { + importedFrom: item.sourcePath, + importedAt: new Date().toISOString() + }, + targetName + ) + if (result.success) { + imported += 1 + } else { + failed.push({ + skillName: item.name, + reason: result.error ?? 'Import failed' + }) + } + } + + if (imported > 0) { + this.updateSyncDirectoryConfig({ lastImportAt: new Date().toISOString() }) + } + + return { + success: failed.length === 0, + imported, + skipped, + failed + } + } + async registerPluginSkill(input: { ownerPluginId: string id: string @@ -1212,6 +1678,90 @@ export class SkillPresenter implements ISkillPresenter { } } + async registerAdoptedSkill(input: SkillAdoptionRegistration): Promise { + const skillRoot = path.resolve(input.canonicalPath) + const metadata = await this.parseSkillMetadata(path.join(skillRoot, 'SKILL.md'), input.name) + if (!metadata || metadata.name !== input.name) { + throw new Error(`Adopted skill "${input.name}" is invalid`) + } + + this.metadataCache.set(input.name, metadata) + this.contentCache.delete(input.name) + this.updateSkillManagementItem(input.name, (item) => ({ + ...item, + canonicalPath: skillRoot, + source: { + type: 'adopted', + agentId: input.agentId, + originalPath: input.originalPath, + adoptedAt: new Date().toISOString() + }, + agentLinks: { + ...item.agentLinks, + [input.agentId]: { + path: input.agentPath, + state: 'linked', + createdByDeepChat: true, + linkedAt: new Date().toISOString() + } + } + })) + + publishDeepchatEvent('skills.catalog.changed', { + reason: 'installed', + name: input.name, + skill: metadata, + version: Date.now() + }) + } + + async registerAgentSkillLink(input: SkillAgentLinkRegistration): Promise { + if (this.metadataCache.size === 0) { + await this.discoverSkills() + } + const metadata = this.metadataCache.get(input.skillName) + if (!metadata) { + throw new Error(`Skill "${input.skillName}" not found`) + } + + this.updateSkillManagementItem(input.skillName, (item) => ({ + ...item, + canonicalPath: metadata.skillRoot, + agentLinks: { + ...item.agentLinks, + [input.agentId]: { + path: input.agentPath, + state: 'linked', + createdByDeepChat: true, + linkedAt: new Date().toISOString() + } + } + })) + + publishDeepchatEvent('skills.catalog.changed', { + reason: 'management-state-updated', + name: input.skillName, + version: Date.now() + }) + } + + async removeAgentSkillLink(input: { skillName: string; agentId: string }): Promise { + this.updateSkillManagementItem(input.skillName, (item) => { + const agentLinks = { ...item.agentLinks } + delete agentLinks[input.agentId] + return { + ...item, + agentLinks: Object.keys(agentLinks).length > 0 ? agentLinks : undefined + } + }) + + publishDeepchatEvent('skills.catalog.changed', { + reason: 'management-state-updated', + name: input.skillName, + version: Date.now() + }) + } + async unregisterPluginSkillsByOwner(ownerPluginId: string): Promise { let changed = false for (const [key, contribution] of this.pluginSkillContributions.entries()) { @@ -1233,7 +1783,10 @@ export class SkillPresenter implements ISkillPresenter { private async installFromDirectory( folderPath: string, - options?: SkillInstallOptions + options?: SkillInstallOptions, + sourceType: SkillSourceType = 'folder-install', + sourcePatch: Partial = {}, + targetName?: string ): Promise { try { this.ensureSkillsDir() @@ -1285,15 +1838,24 @@ export class SkillPresenter implements ISkillPresenter { } } - const targetDir = path.join(this.skillsDir, skillName) + const finalSkillName = targetName?.trim() || skillName + if (!this.isSafeSkillName(finalSkillName)) { + return { + success: false, + error: 'Invalid target skill name', + errorCode: 'invalid_skill' + } + } + + const targetDir = path.join(this.skillsDir, finalSkillName) const resolvedTarget = path.resolve(targetDir) if (resolvedSource === resolvedTarget) { return { success: false, - error: `Skill "${skillName}" already exists`, + error: `Skill "${finalSkillName}" already exists`, errorCode: 'conflict', - existingSkillName: skillName + existingSkillName: finalSkillName } } @@ -1313,36 +1875,51 @@ export class SkillPresenter implements ISkillPresenter { if (!options?.overwrite) { return { success: false, - error: `Skill "${skillName}" already exists`, + error: `Skill "${finalSkillName}" already exists`, errorCode: 'conflict', - existingSkillName: skillName + existingSkillName: finalSkillName } } - const replaceResult = this.prepareExistingSkillTargetForInstall(skillName, resolvedTarget) + const replaceResult = this.prepareExistingSkillTargetForInstall( + finalSkillName, + resolvedTarget + ) if (replaceResult) { return replaceResult } - this.metadataCache.delete(skillName) - this.contentCache.delete(skillName) + this.metadataCache.delete(finalSkillName) + this.contentCache.delete(finalSkillName) } this.copyDirectory(resolvedSource, resolvedTarget) + if (finalSkillName !== skillName) { + this.rewriteSkillManifestName(resolvedTarget, finalSkillName) + } const metadata = await this.parseSkillMetadata( path.join(resolvedTarget, 'SKILL.md'), - skillName + finalSkillName ) if (metadata) { - this.metadataCache.set(skillName, metadata) - } + this.metadataCache.set(finalSkillName, metadata) + } + this.updateSkillManagementItem(finalSkillName, (item) => ({ + ...item, + canonicalPath: resolvedTarget, + source: { + type: sourceType, + installedAt: new Date().toISOString(), + ...sourcePatch + } + })) publishDeepchatEvent('skills.catalog.changed', { reason: 'installed', - name: skillName, + name: finalSkillName, version: Date.now() }) - return { success: true, skillName } + return { success: true, skillName: finalSkillName, targetPath: resolvedTarget } } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error) return { success: false, error: errorMsg, errorCode: 'io_error' } @@ -1372,16 +1949,25 @@ export class SkillPresenter implements ISkillPresenter { private backupExistingSkill(skillName: string): string { const sourceDir = path.join(this.skillsDir, skillName) const timestamp = new Date().toISOString().replace(/[:.]/g, '-') - let backupDir = path.join(this.skillsDir, `${skillName}.backup-${timestamp}`) + const backupRoot = path.join(app.getPath('home'), '.deepchat', 'backups', 'skill-installs') + fs.mkdirSync(backupRoot, { recursive: true }) + let backupDir = path.join(backupRoot, `${skillName}-${timestamp}`) let counter = 0 while (fs.existsSync(backupDir)) { counter += 1 - backupDir = path.join(this.skillsDir, `${skillName}.backup-${timestamp}-${counter}`) + backupDir = path.join(backupRoot, `${skillName}-${timestamp}-${counter}`) } fs.renameSync(sourceDir, backupDir) return backupDir } + private rewriteSkillManifestName(skillDir: string, name: string): void { + const skillPath = path.join(skillDir, 'SKILL.md') + const raw = fs.readFileSync(skillPath, 'utf-8') + const parsed = matter(raw) + fs.writeFileSync(skillPath, matter.stringify(parsed.content, { ...parsed.data, name }), 'utf-8') + } + private createTargetLockedFailure( skillName: string, targetPath: string, @@ -1545,6 +2131,235 @@ export class SkillPresenter implements ISkillPresenter { } } + private async cloneGitSkillRepo(repoUrl: string): Promise { + const operationRoot = path.join(app.getPath('home'), '.deepchat', 'tmp', 'skill-installs') + fs.mkdirSync(operationRoot, { recursive: true }) + const cloneDir = path.join(operationRoot, `${Date.now()}-${randomUUID()}`) + try { + await execFileAsync('git', ['clone', '--depth', '1', repoUrl, cloneDir], { + timeout: SKILL_CONFIG.DOWNLOAD_TIMEOUT + }) + return cloneDir + } catch (error) { + fs.rmSync(cloneDir, { recursive: true, force: true }) + const errorMsg = error instanceof Error ? error.message : String(error) + throw new Error(`Failed to clone Git repository: ${errorMsg}`) + } + } + + private async scanGitSkillRepoDirectory( + repoUrl: string, + repoRoot: string + ): Promise { + const rootSkill = path.join(repoRoot, 'SKILL.md') + if (fs.existsSync(rootSkill)) { + return { + repoUrl, + repoFormat: 'single-skill', + skills: [this.createGitScanItem(repoRoot, 'SKILL.md')] + } + } + + const skillsRoot = path.join(repoRoot, 'skills') + const skills = fs.existsSync(skillsRoot) + ? fs + .readdirSync(skillsRoot, { withFileTypes: true }) + .filter((entry) => entry.isDirectory()) + .map((entry) => + this.createGitScanItem( + path.join(skillsRoot, entry.name), + path.join('skills', entry.name, 'SKILL.md') + ) + ) + : [] + + return { + repoUrl, + repoFormat: 'multi-skill', + skills: skills.sort((left, right) => left.name.localeCompare(right.name)) + } + } + + private createGitScanItem(skillDir: string, relativePath: string): GitSkillRepoScanItem { + const summary = this.readSkillManifestSummary(skillDir) + if (!summary.valid) { + return { + name: path.basename(skillDir), + description: '', + relativePath, + conflict: false, + valid: false, + error: summary.error + } + } + + return { + name: summary.name, + description: summary.description, + relativePath, + conflict: fs.existsSync(path.join(this.skillsDir, summary.name)), + valid: true + } + } + + private readSkillManifestSummary( + skillDir: string + ): { valid: true; name: string; description: string } | { valid: false; error: string } { + const skillPath = path.join(skillDir, 'SKILL.md') + if (!fs.existsSync(skillPath)) { + return { valid: false, error: 'SKILL.md not found' } + } + + try { + const content = fs.readFileSync(skillPath, 'utf-8') + const { data } = matter(content) + const name = typeof data.name === 'string' ? data.name.trim() : '' + const description = typeof data.description === 'string' ? data.description.trim() : '' + if (!name || !description || !this.isSafeSkillName(name)) { + return { valid: false, error: 'Invalid SKILL.md frontmatter' } + } + return { valid: true, name, description } + } catch (error) { + return { valid: false, error: error instanceof Error ? error.message : String(error) } + } + } + + private createUniqueSkillName(baseName: string): string { + let counter = 1 + let candidate = `${baseName}-${counter}` + while (fs.existsSync(path.join(this.skillsDir, candidate))) { + counter += 1 + candidate = `${baseName}-${counter}` + } + return candidate + } + + private requireSyncDirectoryConfig(): SkillSyncDirectoryConfig { + const config = this.getStoredManagementState().sync + if (!config) { + throw new Error('Skills sync directory is not configured') + } + return config + } + + private updateSyncDirectoryConfig(patch: Partial): void { + const state = this.getStoredManagementState() + if (!state.sync) { + throw new Error('Skills sync directory is not configured') + } + state.sync = { + ...state.sync, + ...patch + } + this.saveManagementState(state) + publishDeepchatEvent('skills.catalog.changed', { + reason: 'sync-directory-updated', + version: Date.now() + }) + } + + private ensureSyncDirectoryReadme(syncDirectory: string): void { + const readmePath = path.join(syncDirectory, 'README.md') + if (!fs.existsSync(readmePath)) { + fs.writeFileSync( + readmePath, + '# DeepChat Skills\n\nThis directory stores portable DeepChat skills under `skills/`.\n', + 'utf-8' + ) + } + } + + private resolveExportPreviewState( + sourcePath: string, + targetPath: string + ): SkillSyncDirectoryPreviewItem['state'] { + if (!fs.existsSync(targetPath)) { + return 'new' + } + return this.areSkillDirectoriesSame(sourcePath, targetPath) ? 'same' : 'modified' + } + + private createImportPreviewItem( + sourcePath: string, + fallbackTargetPath: string + ): SkillSyncDirectoryPreviewItem { + const summary = this.readSkillManifestSummary(sourcePath) + if (!summary.valid) { + return { + name: path.basename(sourcePath), + state: 'invalid', + sourcePath, + targetPath: fallbackTargetPath, + error: summary.error + } + } + + const targetPath = path.join(this.skillsDir, summary.name) + if (!fs.existsSync(targetPath)) { + return { + name: summary.name, + state: 'new', + sourcePath, + targetPath + } + } + + if (this.areSkillDirectoriesSame(sourcePath, targetPath)) { + return { + name: summary.name, + state: 'same', + sourcePath, + targetPath + } + } + + const existingSource = this.getStoredManagementState().skills[summary.name]?.source + const state = + existingSource?.type === 'imported' && existingSource.importedFrom === sourcePath + ? 'modified' + : 'conflict' + return { + name: summary.name, + state, + sourcePath, + targetPath + } + } + + private areSkillDirectoriesSame(left: string, right: string): boolean { + try { + return this.createSkillDirectorySnapshot(left) === this.createSkillDirectorySnapshot(right) + } catch { + return false + } + } + + private createSkillDirectorySnapshot(root: string): string { + return this.collectSkillDirectoryFiles(root) + .sort() + .map((relativePath) => { + const content = fs.readFileSync(path.join(root, relativePath)).toString('base64') + return `${relativePath}\0${content}` + }) + .join('\0') + } + + private collectSkillDirectoryFiles(root: string, current: string = root): string[] { + const files: string[] = [] + for (const entry of fs.readdirSync(current, { withFileTypes: true })) { + if (entry.isSymbolicLink() || entry.name === SKILL_CONFIG.SIDECAR_DIR) { + continue + } + const fullPath = path.join(current, entry.name) + if (entry.isDirectory()) { + files.push(...this.collectSkillDirectoryFiles(root, fullPath)) + } else { + files.push(path.relative(root, fullPath)) + } + } + return files + } + /** * Uninstall a skill */ @@ -1584,9 +2399,9 @@ export class SkillPresenter implements ISkillPresenter { private cleanupUninstalledSkillState(name: string): void { if (this.isSafeSkillName(name)) { try { - this.deleteSkillExtension(name) + this.deleteSkillManagementItem(name) } catch (error) { - logger.warn('[SkillPresenter] Failed to delete skill sidecar after uninstall', { + logger.warn('[SkillPresenter] Failed to delete skill management state after uninstall', { name, error }) @@ -1642,15 +2457,17 @@ export class SkillPresenter implements ISkillPresenter { return { success: false, error: `Skill "${name}" not found` } } - const sidecarPath = this.getSidecarPath(name) const previousSkillContent = fs.readFileSync(metadata.path, 'utf-8') - const hadSidecar = fs.existsSync(sidecarPath) - const previousSidecarContent = hadSidecar ? fs.readFileSync(sidecarPath, 'utf-8') : null + const previousState = this.getStoredManagementState() const sanitized = sanitizeSkillExtensionConfig(config) try { fs.writeFileSync(metadata.path, content, 'utf-8') - fs.writeFileSync(sidecarPath, JSON.stringify(sanitized, null, 2), 'utf-8') + this.updateSkillManagementItem(name, (item) => ({ + ...item, + canonicalPath: metadata.skillRoot, + extension: sanitized + })) this.contentCache.delete(name) const newMetadata = await this.parseSkillMetadata(metadata.path, name) @@ -1664,11 +2481,7 @@ export class SkillPresenter implements ISkillPresenter { try { fs.writeFileSync(metadata.path, previousSkillContent, 'utf-8') - if (hadSidecar && previousSidecarContent !== null) { - fs.writeFileSync(sidecarPath, previousSidecarContent, 'utf-8') - } else if (fs.existsSync(sidecarPath)) { - fs.rmSync(sidecarPath, { force: true }) - } + this.saveManagementState(previousState) } catch (rollbackError) { const rollbackMessage = rollbackError instanceof Error ? rollbackError.message : String(rollbackError) @@ -1776,14 +2589,36 @@ export class SkillPresenter implements ISkillPresenter { async getSkillExtension(name: string): Promise { this.ensureSkillsDir() + const item = this.getStoredManagementState().skills[name] + if (item) { + return sanitizeSkillExtensionConfig(item.extension) + } + + return await this.migrateLegacySkillExtension(name) + } + + private async migrateLegacySkillExtension(name: string): Promise { const sidecarPath = this.getSidecarPath(name) if (!(await this.pathExists(sidecarPath))) { return createDefaultSkillExtensionConfig() } - try { const content = await fs.promises.readFile(sidecarPath, 'utf-8') - return sanitizeSkillExtensionConfig(JSON.parse(content)) + const config = sanitizeSkillExtensionConfig(JSON.parse(content)) + this.updateSkillManagementItem(name, (item) => ({ + ...item, + extension: config + })) + try { + fs.rmSync(sidecarPath, { force: true }) + this.removeLegacySidecarDirIfEmpty() + } catch (cleanupError) { + logger.warn('[SkillPresenter] Failed to remove migrated skill sidecar', { + name, + error: cleanupError + }) + } + return config } catch (error) { logger.warn('[SkillPresenter] Failed to read skill sidecar, using defaults', { name, @@ -1793,6 +2628,16 @@ export class SkillPresenter implements ISkillPresenter { } } + private removeLegacySidecarDirIfEmpty(): void { + try { + if (fs.existsSync(this.sidecarDir) && fs.readdirSync(this.sidecarDir).length === 0) { + fs.rmSync(this.sidecarDir, { force: true, recursive: false }) + } + } catch { + // Keep legacy residue for the next migration attempt. + } + } + async saveSkillExtension(name: string, config: SkillExtensionConfig): Promise { this.ensureSkillsDir() if (this.metadataCache.size === 0) { @@ -1804,7 +2649,12 @@ export class SkillPresenter implements ISkillPresenter { } const sanitized = sanitizeSkillExtensionConfig(config) - fs.writeFileSync(this.getSidecarPath(name), JSON.stringify(sanitized, null, 2), 'utf-8') + const metadata = this.metadataCache.get(name) + this.updateSkillManagementItem(name, (item) => ({ + ...item, + canonicalPath: metadata?.skillRoot ?? item.canonicalPath, + extension: sanitized + })) this.contentCache.delete(name) } @@ -1958,12 +2808,15 @@ export class SkillPresenter implements ISkillPresenter { /** * Get allowed tools for active skills in a conversation */ - async getActiveSkillsAllowedTools(conversationId: string): Promise { + async getActiveSkillsAllowedTools( + conversationId: string, + activeSkillNamesOverride?: string[] + ): Promise { if (this.metadataCache.size === 0) { await this.discoverSkills() } - const activeSkills = await this.getActiveSkills(conversationId) + const activeSkills = activeSkillNamesOverride ?? (await this.getActiveSkills(conversationId)) const allowedTools: Set = new Set() for (const skillName of activeSkills) { @@ -2236,10 +3089,11 @@ export class SkillPresenter implements ISkillPresenter { return path.join(this.sidecarDir, `${name}.json`) } - private deleteSkillExtension(name: string): void { - const sidecarPath = this.getSidecarPath(name) - if (fs.existsSync(sidecarPath)) { - fs.rmSync(sidecarPath, { force: true }) + private deleteSkillManagementItem(name: string): void { + const state = this.getStoredManagementState() + if (state.skills[name]) { + delete state.skills[name] + this.saveManagementState(state) } } diff --git a/src/main/presenter/skillPresenter/skillExecutionService.ts b/src/main/presenter/skillPresenter/skillExecutionService.ts index d3d415b07..f9e12bcd0 100644 --- a/src/main/presenter/skillPresenter/skillExecutionService.ts +++ b/src/main/presenter/skillPresenter/skillExecutionService.ts @@ -41,6 +41,7 @@ export interface SkillRunRequest { export interface SkillRunOptions { conversationId: string + activeSkillNames?: string[] } interface SkillExecutionServiceOptions { @@ -87,7 +88,7 @@ export class SkillExecutionService { async execute(input: SkillRunRequest, options: SkillRunOptions): Promise { const plan = await this.preparePlanForExecution( - await this.buildSpawnPlan(input, options.conversationId) + await this.buildSpawnPlan(input, options.conversationId, options.activeSkillNames) ) const timeoutMs = input.timeoutMs ?? DEFAULT_TIMEOUT_MS @@ -125,10 +126,15 @@ export class SkillExecutionService { } } - private async buildSpawnPlan(input: SkillRunRequest, conversationId: string): Promise { - const activeSkills = await this.skillPresenter.getActiveSkills(conversationId) + private async buildSpawnPlan( + input: SkillRunRequest, + conversationId: string, + activeSkillNames?: string[] + ): Promise { + const activeSkills = + activeSkillNames ?? (await this.skillPresenter.getActiveSkills(conversationId)) if (!activeSkills.includes(input.skill)) { - throw new Error(`Skill "${input.skill}" is not pinned in this conversation`) + throw new Error(`Skill "${input.skill}" is not active in the current message/tool loop`) } const metadata = (await this.skillPresenter.getMetadataList()).find( diff --git a/src/main/presenter/skillSyncPresenter/index.ts b/src/main/presenter/skillSyncPresenter/index.ts index 292aa2e48..05ba17e12 100644 --- a/src/main/presenter/skillSyncPresenter/index.ts +++ b/src/main/presenter/skillSyncPresenter/index.ts @@ -11,7 +11,9 @@ import logger from '@shared/logger' import * as fs from 'fs' import * as path from 'path' +import { randomUUID } from 'node:crypto' import { app } from 'electron' +import matter from 'gray-matter' import type { ISkillSyncPresenter, ExternalToolConfig, @@ -22,17 +24,38 @@ import type { CanonicalSkill, ExternalSkillInfo, ScanCache, - NewDiscovery + NewDiscovery, + InstalledSkillAgent, + InstalledSkillAgentDetail, + AgentSkillItem, + AdoptAgentSkillInput, + AdoptAgentSkillPreview, + AdoptAgentSkillResult, + AgentSkillLinkInput, + LinkDeepChatSkillResult, + LinkDeepChatSkillsInput, + LinkDeepChatSkillsPreview, + LinkDeepChatSkillsResult, + SkillDetail } from '@shared/types/skillSync' import { ConflictStrategy } from '@shared/types/skillSync' +import type { UnifiedSkillItem } from '@shared/types/skillManagement' import type { ISkillPresenter, IConfigPresenter } from '@shared/presenter' import { toolScanner, resolveSkillsDir } from './toolScanner' import { formatConverter } from './formatConverter' import type { SyncContext } from './types' import { publishDeepchatEvent } from '@/routes/publishDeepchatEvent' -import { isValidToolId, isValidConflictStrategy, checkWritePermission } from './security' +import { + isValidToolId, + isValidConflictStrategy, + checkWritePermission, + checkReadPermission, + isFilenameSafe +} from './security' import { scanAndDetectDiscoveriesInWorker, scanExternalToolsInWorker } from './scanWorker' +const SKILL_NAME_PATTERN = /^[a-z0-9][a-z0-9._-]*$/ + type SkillSyncEventName = | 'skillSync.discoveries.changed' | 'skillSync.scan.started' @@ -734,6 +757,354 @@ export class SkillSyncPresenter implements ISkillSyncPresenter { return toolScanner.getAllTools() } + async scanSkillAgents(): Promise { + const results = await this.scanExternalToolsWithFallback() + const resultByTool = new Map(results.map((result) => [result.toolId, result])) + const agents: InstalledSkillAgent[] = [] + + for (const tool of this.getManageableAgentTools()) { + const result = + resultByTool.get(tool.id) ?? + (await toolScanner.scanTool(tool.id, this.syncContext.projectRoot)) + const detail = await this.buildAgentDetail(tool, result) + const { skills: _skills, ...summary } = detail + agents.push(summary) + } + + return agents + } + + async scanSkillAgent(input: { agentId: string }): Promise { + const tool = toolScanner.getTool(input.agentId) + if (!tool || !this.canManageAgentLinks(tool)) { + return { + id: input.agentId, + name: input.agentId, + skillsDir: '', + isCustom: false, + supportsLinkManagement: false, + skillsCount: 0, + linkedCount: 0, + agentOwnedCount: 0, + conflictCount: 0, + brokenLinkCount: 0, + status: 'detected-no-skills-dir', + skills: [] + } + } + + return this.buildAgentDetail( + tool, + await toolScanner.scanTool(tool.id, this.syncContext.projectRoot) + ) + } + + async getAgentSkillDetail(input: { agentId: string; skillName: string }): Promise { + const detail = await this.scanSkillAgent({ agentId: input.agentId }) + const skill = detail.skills.find((item) => item.name === input.skillName) + if (!skill) { + throw new Error(`Skill "${input.skillName}" not found in ${detail.name}`) + } + + const markdownPath = path.join(skill.path, 'SKILL.md') + const markdown = await fs.promises.readFile(markdownPath, 'utf-8') + return { + name: skill.name, + description: skill.description ?? '', + sourcePath: markdownPath, + markdown, + mutable: skill.owner !== 'broken-link' + } + } + + async previewAdoptAgentSkill(input: AdoptAgentSkillInput): Promise { + const adoption = await this.resolveAdoptionSource(input) + const source = await this.readAdoptableSkill(adoption.sourcePath) + if (source.name !== adoption.skill.name) { + throw new Error(`SKILL.md name "${source.name}" does not match "${adoption.skill.name}"`) + } + + const skillsDir = path.resolve(await this.skillPresenter.getSkillsDir()) + const deepchatSkills = await this.skillPresenter.getUnifiedSkillCatalog() + const deepchatNames = new Set(deepchatSkills.map((skill) => skill.name)) + const hasConflict = + deepchatNames.has(source.name) || (await this.pathExists(path.join(skillsDir, source.name))) + const targetName = + input.targetName ?? + (hasConflict + ? await this.generateAdoptionTargetName( + `${source.name}-${input.agentId}`, + skillsDir, + deepchatNames + ) + : source.name) + + this.assertValidDeepChatSkillName(targetName) + if ( + deepchatNames.has(targetName) || + (await this.pathExists(path.join(skillsDir, targetName))) + ) { + throw new Error(`Skill "${targetName}" already exists`) + } + + const dataRoot = path.dirname(skillsDir) + const targetPath = path.join(skillsDir, targetName) + + return { + agentId: input.agentId, + agentName: adoption.agent.name, + skillName: adoption.skill.name, + targetName, + sourcePath: adoption.sourcePath, + agentPath: adoption.agentPath, + targetPath, + backupRoot: path.join( + dataRoot, + 'backups', + 'skill-adoptions', + input.agentId, + adoption.skill.name + ), + conflict: hasConflict, + warnings: targetName === source.name ? [] : [`Skill will be adopted as "${targetName}"`] + } + } + + async executeAdoptAgentSkill(input: AdoptAgentSkillInput): Promise { + let tempPath = '' + let targetCreated = false + let originalMoved = false + let preview: AdoptAgentSkillPreview | undefined + let backupPath = '' + + try { + preview = await this.previewAdoptAgentSkill(input) + const operationId = `${Date.now()}-${randomUUID()}` + const dataRoot = path.dirname(path.resolve(await this.skillPresenter.getSkillsDir())) + tempPath = path.join(dataRoot, 'tmp', 'skill-adoptions', operationId) + backupPath = path.join(preview.backupRoot, operationId) + + await fs.promises.mkdir(path.dirname(tempPath), { recursive: true }) + await fs.promises.mkdir(path.dirname(backupPath), { recursive: true }) + await this.prepareAdoptionTemp(preview.sourcePath, tempPath, preview.targetName) + + if (await this.pathExists(preview.targetPath)) { + throw new Error(`Skill "${preview.targetName}" already exists`) + } + + await fs.promises.mkdir(path.dirname(preview.targetPath), { recursive: true }) + await fs.promises.rename(tempPath, preview.targetPath) + targetCreated = true + + try { + await fs.promises.rename(preview.agentPath, backupPath) + originalMoved = true + await this.createDirectoryLink(preview.targetPath, preview.agentPath) + } catch (error) { + if (originalMoved && !(await this.pathExists(preview.agentPath))) { + await fs.promises.rename(backupPath, preview.agentPath).catch(() => undefined) + } + if (targetCreated) { + await fs.promises.rm(preview.targetPath, { recursive: true, force: true }) + } + throw error + } + + await this.skillPresenter.registerAdoptedSkill({ + name: preview.targetName, + canonicalPath: preview.targetPath, + agentId: preview.agentId, + agentPath: preview.agentPath, + originalPath: preview.sourcePath + }) + + return { + success: true, + skillName: preview.targetName, + targetPath: preview.targetPath, + agentPath: preview.agentPath, + backupPath + } + } catch (error) { + if (tempPath) { + await fs.promises.rm(tempPath, { recursive: true, force: true }).catch(() => undefined) + } + return { + success: false, + skillName: preview?.targetName, + targetPath: preview?.targetPath, + agentPath: preview?.agentPath, + backupPath: backupPath || undefined, + error: error instanceof Error ? error.message : String(error) + } + } + } + + async previewLinkDeepChatSkills( + input: LinkDeepChatSkillsInput + ): Promise { + const tool = this.resolveManageableAgentTool(input.agentId) + const detail = await this.scanSkillAgent({ agentId: input.agentId }) + const skillsDir = detail.skillsDir || resolveSkillsDir(tool, this.syncContext.projectRoot) + const existingByName = new Map(detail.skills.map((skill) => [skill.name, skill])) + const deepchatByName = new Map( + (await this.skillPresenter.getUnifiedSkillCatalog()).map((skill) => [skill.name, skill]) + ) + + return { + agentId: input.agentId, + agentName: tool.name, + skillsDir, + items: await Promise.all( + [...new Set(input.skillNames)].map(async (skillName) => { + this.assertValidDeepChatSkillName(skillName) + const deepchat = deepchatByName.get(skillName) + const targetPath = path.join(skillsDir, skillName) + if (!deepchat) { + return { + skillName, + targetPath, + status: 'missing', + message: `Skill "${skillName}" not found in DeepChat` + } + } + + const existing = existingByName.get(skillName) + if (!existing) { + return { + skillName, + sourcePath: deepchat.skillRoot, + targetPath, + status: 'ready' + } + } + + if ( + existing.status === 'linked' && + existing.link?.targetPath && + path.resolve(existing.link.targetPath) === path.resolve(deepchat.skillRoot) + ) { + return { + skillName, + sourcePath: deepchat.skillRoot, + targetPath, + status: 'already-linked' + } + } + + return { + skillName, + sourcePath: deepchat.skillRoot, + targetPath, + status: 'conflict', + message: `Agent path already exists: ${targetPath}` + } + }) + ) + } + } + + async executeLinkDeepChatSkills( + input: LinkDeepChatSkillsInput + ): Promise { + const preview = await this.previewLinkDeepChatSkills(input) + const result: LinkDeepChatSkillsResult = { + success: true, + linked: 0, + skipped: 0, + failed: [] + } + + await fs.promises.mkdir(preview.skillsDir, { recursive: true }) + if (!(await checkWritePermission(preview.skillsDir))) { + return { + success: false, + linked: 0, + skipped: 0, + failed: input.skillNames.map((skillName) => ({ + skillName, + reason: `No write permission for: ${preview.skillsDir}` + })) + } + } + + for (const item of preview.items) { + if (item.status === 'already-linked') { + result.skipped += 1 + continue + } + if (item.status !== 'ready' || !item.sourcePath) { + result.skipped += 1 + continue + } + + try { + await this.createDirectoryLink(item.sourcePath, item.targetPath) + await this.skillPresenter.registerAgentSkillLink({ + skillName: item.skillName, + agentId: input.agentId, + agentPath: item.targetPath + }) + result.linked += 1 + } catch (error) { + result.failed.push({ + skillName: item.skillName, + reason: error instanceof Error ? error.message : String(error) + }) + } + } + + result.success = result.failed.length === 0 + return result + } + + async repairAgentSkillLink(input: AgentSkillLinkInput): Promise { + try { + const link = await this.resolveDeepChatOwnedAgentLink(input) + await this.assertAgentPathIsLinkOrMissing(link.agentPath) + await fs.promises.rm(link.agentPath, { recursive: true, force: true }) + await this.createDirectoryLink(link.targetPath, link.agentPath) + await this.skillPresenter.registerAgentSkillLink({ + skillName: input.skillName, + agentId: input.agentId, + agentPath: link.agentPath + }) + return { + success: true, + skillName: input.skillName, + agentPath: link.agentPath, + targetPath: link.targetPath + } + } catch (error) { + return { + success: false, + skillName: input.skillName, + error: error instanceof Error ? error.message : String(error) + } + } + } + + async removeAgentSkillLink(input: AgentSkillLinkInput): Promise { + try { + const link = await this.resolveDeepChatOwnedAgentLink(input) + await this.assertAgentPathIsLinkOrMissing(link.agentPath) + await fs.promises.rm(link.agentPath, { recursive: true, force: true }) + await this.skillPresenter.removeAgentSkillLink(input) + return { + success: true, + skillName: input.skillName, + agentPath: link.agentPath, + targetPath: link.targetPath + } + } catch (error) { + return { + success: false, + skillName: input.skillName, + error: error instanceof Error ? error.message : String(error) + } + } + } + /** * Check if a tool's directory exists */ @@ -753,6 +1124,440 @@ export class SkillSyncPresenter implements ISkillSyncPresenter { // Private Helper Methods // ============================================================================ + private async resolveAdoptionSource(input: AdoptAgentSkillInput): Promise<{ + agent: InstalledSkillAgentDetail + skill: AgentSkillItem + sourcePath: string + agentPath: string + }> { + const tool = toolScanner.getTool(input.agentId) + if (!tool || !this.canManageAgentLinks(tool)) { + throw new Error(`Agent "${input.agentId}" does not support skill adoption`) + } + + const agent = await this.scanSkillAgent({ agentId: input.agentId }) + const skill = agent.skills.find((item) => item.name === input.skillName) + if (!skill) { + throw new Error(`Skill "${input.skillName}" not found in ${agent.name}`) + } + if (!['agent-owned', 'linked-out', 'conflict'].includes(skill.status)) { + throw new Error(`Skill "${input.skillName}" cannot be adopted from status "${skill.status}"`) + } + if (!this.isInsideDirectory(skill.path, agent.skillsDir)) { + throw new Error(`Agent path escapes skills directory: ${skill.path}`) + } + + const sourcePath = skill.status === 'linked-out' ? skill.link?.targetPath : skill.path + if (!sourcePath) { + throw new Error(`Skill "${input.skillName}" source path is unavailable`) + } + if (!(await checkReadPermission(sourcePath))) { + throw new Error(`No read permission for: ${sourcePath}`) + } + + return { + agent, + skill, + sourcePath, + agentPath: skill.path + } + } + + private resolveManageableAgentTool(agentId: string): ExternalToolConfig { + const tool = toolScanner.getTool(agentId) + if (!tool || !this.canManageAgentLinks(tool)) { + throw new Error(`Agent "${agentId}" does not support skill links`) + } + return tool + } + + private async resolveDeepChatOwnedAgentLink(input: AgentSkillLinkInput): Promise<{ + agentPath: string + targetPath: string + }> { + this.assertValidDeepChatSkillName(input.skillName) + const tool = this.resolveManageableAgentTool(input.agentId) + const skillsDir = resolveSkillsDir(tool, this.syncContext.projectRoot) + const state = await this.skillPresenter.getSkillManagementState() + const link = state.skills[input.skillName]?.agentLinks?.[input.agentId] + if (!link?.createdByDeepChat) { + throw new Error(`Link for "${input.skillName}" was not created by DeepChat`) + } + + const deepchat = (await this.skillPresenter.getUnifiedSkillCatalog()).find( + (skill) => skill.name === input.skillName + ) + if (!deepchat || !(await this.pathExists(deepchat.skillRoot))) { + throw new Error(`DeepChat skill "${input.skillName}" not found`) + } + + if (!this.isInsideDirectory(link.path, skillsDir)) { + throw new Error(`Agent link path escapes skills directory: ${link.path}`) + } + + return { + agentPath: link.path, + targetPath: deepchat.skillRoot + } + } + + private async assertAgentPathIsLinkOrMissing(agentPath: string): Promise { + try { + await fs.promises.readlink(agentPath) + return + } catch { + if (await this.pathExists(agentPath)) { + throw new Error(`Agent path is not a link: ${agentPath}`) + } + } + } + + private async readAdoptableSkill(skillRoot: string): Promise<{ + name: string + description: string + parsed: matter.GrayMatterFile + }> { + const skillPath = path.join(skillRoot, 'SKILL.md') + const content = await fs.promises.readFile(skillPath, 'utf-8') + const parsed = matter(content) + const name = typeof parsed.data.name === 'string' ? parsed.data.name.trim() : '' + const description = + typeof parsed.data.description === 'string' ? parsed.data.description.trim() : '' + this.assertValidDeepChatSkillName(name) + if (!description) { + throw new Error('Skill description not found in SKILL.md frontmatter') + } + return { name, description, parsed } + } + + private assertValidDeepChatSkillName(name: string): void { + if (!SKILL_NAME_PATTERN.test(name) || name.includes('/') || name.includes('\\')) { + throw new Error(`Invalid skill name: ${name}`) + } + } + + private async generateAdoptionTargetName( + baseName: string, + skillsDir: string, + existingNames: Set + ): Promise { + this.assertValidDeepChatSkillName(baseName) + let candidate = baseName + let counter = 2 + while ( + existingNames.has(candidate) || + (await this.pathExists(path.join(skillsDir, candidate))) + ) { + candidate = `${baseName}-${counter}` + counter += 1 + } + return candidate + } + + private async prepareAdoptionTemp( + sourcePath: string, + tempPath: string, + targetName: string + ): Promise { + await fs.promises.rm(tempPath, { recursive: true, force: true }) + await this.copyDirectoryWithoutSymlinks(sourcePath, tempPath) + const copied = await this.readAdoptableSkill(tempPath) + if (copied.name !== targetName) { + copied.parsed.data.name = targetName + await fs.promises.writeFile( + path.join(tempPath, 'SKILL.md'), + matter.stringify(copied.parsed.content, copied.parsed.data), + 'utf-8' + ) + } + } + + private async copyDirectoryWithoutSymlinks( + sourcePath: string, + targetPath: string + ): Promise { + await fs.promises.mkdir(targetPath, { recursive: true }) + const entries = await fs.promises.readdir(sourcePath, { withFileTypes: true }) + for (const entry of entries) { + if (entry.isSymbolicLink() || entry.name === '.deepchat-meta') { + continue + } + const sourceEntry = path.join(sourcePath, entry.name) + const targetEntry = path.join(targetPath, entry.name) + if (entry.isDirectory()) { + await this.copyDirectoryWithoutSymlinks(sourceEntry, targetEntry) + } else if (entry.isFile()) { + await fs.promises.copyFile(sourceEntry, targetEntry) + } + } + } + + private async createDirectoryLink(targetPath: string, linkPath: string): Promise { + await fs.promises.symlink( + targetPath, + linkPath, + process.platform === 'win32' ? 'junction' : 'dir' + ) + } + + private getManageableAgentTools(): ExternalToolConfig[] { + return toolScanner.getAllTools().filter((tool) => this.canManageAgentLinks(tool)) + } + + private canManageAgentLinks(tool: ExternalToolConfig): boolean { + return ( + !tool.isProjectLevel && + tool.filePattern === '*/SKILL.md' && + tool.capabilities.supportsSubfolders + ) + } + + private async buildAgentDetail( + tool: ExternalToolConfig, + result: ScanResult + ): Promise { + if (!result.available) { + return this.createAgentDetail( + tool, + result.skillsDir || tool.skillsDir, + 'detected-no-skills-dir', + [] + ) + } + + const skills = await this.classifyAgentSkills(result) + const status = skills.some((skill) => skill.status === 'empty') ? 'permission-denied' : 'ready' + return this.createAgentDetail( + tool, + result.skillsDir, + status, + skills.filter((skill) => skill.status !== 'empty') + ) + } + + private createAgentDetail( + tool: ExternalToolConfig, + skillsDir: string, + status: InstalledSkillAgent['status'], + skills: AgentSkillItem[] + ): InstalledSkillAgentDetail { + return { + id: tool.id, + name: tool.name, + skillsDir, + isCustom: false, + supportsLinkManagement: this.canManageAgentLinks(tool), + skillsCount: skills.length, + linkedCount: skills.filter((skill) => skill.status === 'linked').length, + agentOwnedCount: skills.filter((skill) => skill.status === 'agent-owned').length, + conflictCount: skills.filter((skill) => skill.status === 'conflict').length, + brokenLinkCount: skills.filter((skill) => skill.status === 'broken-link').length, + status, + skills + } + } + + private async classifyAgentSkills(result: ScanResult): Promise { + const deepchatSkills = await this.skillPresenter.getUnifiedSkillCatalog() + const deepchatByName = new Map(deepchatSkills.map((skill) => [skill.name, skill])) + const deepchatSkillsDir = path.resolve(await this.skillPresenter.getSkillsDir()) + const scannedByPath = new Map(result.skills.map((skill) => [path.resolve(skill.path), skill])) + + let entries: fs.Dirent[] + try { + entries = await fs.promises.readdir(result.skillsDir, { withFileTypes: true }) + } catch (error) { + const code = typeof error === 'object' && error ? (error as { code?: unknown }).code : null + if (code === 'EACCES' || code === 'EPERM') { + return [ + { + name: result.toolId, + path: result.skillsDir, + owner: 'unknown', + status: 'empty' + } + ] + } + return [] + } + + const skills: AgentSkillItem[] = [] + for (const entry of entries) { + if (!isFilenameSafe(entry.name) || (!entry.isDirectory() && !entry.isSymbolicLink())) { + continue + } + + const entryPath = path.join(result.skillsDir, entry.name) + if (entry.isSymbolicLink()) { + skills.push( + await this.classifyAgentSkillLink( + result.toolId, + entry.name, + entryPath, + deepchatSkillsDir, + deepchatByName + ) + ) + continue + } + + const scanInfo = scannedByPath.get(path.resolve(entryPath)) + if (!scanInfo) { + continue + } + skills.push(await this.classifyAgentSkillDirectory(scanInfo, deepchatByName)) + } + + return skills.sort((left, right) => left.name.localeCompare(right.name)) + } + + private async classifyAgentSkillDirectory( + skill: ExternalSkillInfo, + deepchatByName: Map + ): Promise { + const deepchat = deepchatByName.get(skill.name) + if (!deepchat) { + return { + name: skill.name, + description: skill.description, + path: skill.path, + owner: 'agent', + status: 'agent-owned', + action: 'adopt', + deepchat: { exists: false } + } + } + + const sameContent = await this.hasSameSkillContent(skill.path, deepchat.skillRoot) + return { + name: skill.name, + description: skill.description || deepchat.description, + path: skill.path, + owner: 'agent', + status: sameContent ? 'agent-owned' : 'conflict', + action: sameContent ? 'adopt' : 'resolve-conflict', + deepchat: { + exists: true, + path: deepchat.skillRoot, + disabled: deepchat.deepchatDisabled, + sameContent + } + } + } + + private async classifyAgentSkillLink( + agentId: string, + name: string, + linkPath: string, + deepchatSkillsDir: string, + deepchatByName: Map + ): Promise { + const targetPath = await this.readResolvedLinkTarget(linkPath) + const targetExists = targetPath ? await this.pathExists(targetPath) : false + const targetInsideDeepChat = Boolean( + targetPath && this.isInsideDirectory(targetPath, deepchatSkillsDir) + ) + const deepchat = deepchatByName.get(name) + const createdByDeepChat = + deepchat?.agentLinks[agentId]?.createdByDeepChat === true && + path.resolve(deepchat.agentLinks[agentId].path) === path.resolve(linkPath) + + if (!targetExists) { + return { + name, + path: linkPath, + owner: 'broken-link', + status: 'broken-link', + action: createdByDeepChat ? 'repair-link' : undefined, + link: { + isSymlink: true, + targetPath, + targetExists: false, + targetInsideDeepChat, + createdByDeepChat + }, + deepchat: deepchat + ? { exists: true, path: deepchat.skillRoot, disabled: deepchat.deepchatDisabled } + : { exists: false } + } + } + + if (targetInsideDeepChat) { + return { + name, + description: deepchat?.description, + path: linkPath, + owner: 'deepchat', + status: 'linked', + action: createdByDeepChat ? 'remove-link' : undefined, + link: { + isSymlink: true, + targetPath, + targetExists: true, + targetInsideDeepChat: true, + createdByDeepChat + }, + deepchat: deepchat + ? { exists: true, path: deepchat.skillRoot, disabled: deepchat.deepchatDisabled } + : { exists: false } + } + } + + return { + name, + path: linkPath, + owner: 'external-link', + status: 'linked-out', + action: 'adopt', + link: { + isSymlink: true, + targetPath, + targetExists: true, + targetInsideDeepChat: false + }, + deepchat: deepchat + ? { exists: true, path: deepchat.skillRoot, disabled: deepchat.deepchatDisabled } + : { exists: false } + } + } + + private async readResolvedLinkTarget(linkPath: string): Promise { + try { + const rawTarget = await fs.promises.readlink(linkPath) + return path.isAbsolute(rawTarget) + ? path.resolve(rawTarget) + : path.resolve(path.dirname(linkPath), rawTarget) + } catch { + return undefined + } + } + + private async pathExists(targetPath: string): Promise { + try { + await fs.promises.access(targetPath, fs.constants.F_OK) + return true + } catch { + return false + } + } + + private isInsideDirectory(targetPath: string, parentPath: string): boolean { + const relative = path.relative(parentPath, path.resolve(targetPath)) + return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative)) + } + + private async hasSameSkillContent(leftRoot: string, rightRoot: string): Promise { + try { + const [left, right] = await Promise.all([ + fs.promises.readFile(path.join(leftRoot, 'SKILL.md'), 'utf-8'), + fs.promises.readFile(path.join(rightRoot, 'SKILL.md'), 'utf-8') + ]) + return left === right + } catch { + return false + } + } + /** * Parse an external skill file */ diff --git a/src/main/presenter/sqlitePresenter/tables/agentMemory.ts b/src/main/presenter/sqlitePresenter/tables/agentMemory.ts index 3919ec179..bde8464d9 100644 --- a/src/main/presenter/sqlitePresenter/tables/agentMemory.ts +++ b/src/main/presenter/sqlitePresenter/tables/agentMemory.ts @@ -805,6 +805,46 @@ export class AgentMemoryTable extends BaseTable { return row?.at ?? null } + getCurrentEmbeddingDimension(agentId: string, fingerprint: string): number | null { + const row = this.db + .prepare( + `SELECT embedding_dim AS dim + FROM agent_memory + WHERE agent_id = ? + AND superseded_by IS NULL + AND status = 'embedded' + AND kind NOT IN ('persona', 'working') + AND embedding_model = ? + AND embedding_dim IS NOT NULL + AND embedding_dim > 0 + ORDER BY created_at DESC, rowid DESC + LIMIT 1` + ) + .get(agentId, fingerprint) as { dim: number | null } | undefined + return row?.dim ?? null + } + + hasStaleEmbeddings(agentId: string, currentDim: number, fingerprint: string): boolean { + const row = this.db + .prepare( + `SELECT 1 AS stale + FROM agent_memory + WHERE agent_id = ? + AND superseded_by IS NULL + AND status = 'embedded' + AND kind NOT IN ('persona', 'working') + AND ( + embedding_dim IS NULL OR + embedding_dim != ? OR + embedding_model IS NULL OR + embedding_model != ? + ) + LIMIT 1` + ) + .get(agentId, currentDim, fingerprint) as { stale: number } | undefined + return row !== undefined + } + // Soft delete: archived rows stay on disk (and in the vector store) but drop out of recall. archive(id: string, _at: number = Date.now()): void { this.db.prepare("UPDATE agent_memory SET status = 'archived' WHERE id = ?").run(id) diff --git a/src/main/presenter/sqlitePresenter/tables/configTables.ts b/src/main/presenter/sqlitePresenter/tables/configTables.ts index 3b5ece24d..142e3990c 100644 --- a/src/main/presenter/sqlitePresenter/tables/configTables.ts +++ b/src/main/presenter/sqlitePresenter/tables/configTables.ts @@ -368,9 +368,9 @@ export class ConfigTables extends BaseTable { } hasModelStatus(statusKey: string): boolean { - const row = this.db.prepare('SELECT 1 FROM model_status WHERE status_key = ?').get(statusKey) as - | { 1: number } - | undefined + const row = this.db + .prepare('SELECT 1 FROM model_status WHERE status_key = ?') + .get(statusKey) as { 1: number } | undefined return Boolean(row) } diff --git a/src/main/presenter/toolPresenter/agentTools/agentImageGenerationTool.ts b/src/main/presenter/toolPresenter/agentTools/agentImageGenerationTool.ts index e247a3657..04de780d5 100644 --- a/src/main/presenter/toolPresenter/agentTools/agentImageGenerationTool.ts +++ b/src/main/presenter/toolPresenter/agentTools/agentImageGenerationTool.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import type { IConfigPresenter, MCPToolDefinition } from '@shared/presenter' import type { ToolCallImagePreview } from '@shared/types/core/mcp' import type { ImageGenerationOptions } from '@shared/imageGenerationSettings' @@ -25,40 +25,38 @@ import type { AgentToolRuntimePort } from '../runtimePorts' export { IMAGE_GENERATE_TOOL_NAME, IMAGE_GENERATION_TOOL_SERVER_NAME } -const imageGenerateSchema = z - .object({ - prompt: z - .string() - .trim() - .min(1) - .max(8000) - .describe('Detailed text prompt for the image to generate.'), - size: z - .string() - .trim() - .refine((value) => !value || isValidOpenAIImageGenerationSize(value), { - message: 'size must be a valid WIDTHxHEIGHT image generation size' - }) - .optional() - .describe('Optional output size, such as 1024x1024, 1536x1024, or 1024x1536.'), - quality: z - .enum(IMAGE_GENERATION_QUALITY_VALUES) - .optional() - .describe('Optional quality hint when the selected image model supports it.'), - outputFormat: z - .enum(IMAGE_GENERATION_OUTPUT_FORMAT_VALUES) - .optional() - .describe('Optional output format hint when the selected image model supports it.'), - background: z - .enum(OPENAI_IMAGE_GENERATION_BACKGROUND_VALUES) - .optional() - .describe('Optional background hint when the selected image model supports it.'), - moderation: z - .enum(IMAGE_GENERATION_MODERATION_VALUES) - .optional() - .describe('Optional moderation hint when the selected image model supports it.') - }) - .strict() +const imageGenerateSchema = z.strictObject({ + prompt: z + .string() + .trim() + .min(1) + .max(8000) + .describe('Detailed text prompt for the image to generate.'), + size: z + .string() + .trim() + .refine((value) => !value || isValidOpenAIImageGenerationSize(value), { + message: 'size must be a valid WIDTHxHEIGHT image generation size' + }) + .optional() + .describe('Optional output size, such as 1024x1024, 1536x1024, or 1024x1536.'), + quality: z + .enum(IMAGE_GENERATION_QUALITY_VALUES) + .optional() + .describe('Optional quality hint when the selected image model supports it.'), + outputFormat: z + .enum(IMAGE_GENERATION_OUTPUT_FORMAT_VALUES) + .optional() + .describe('Optional output format hint when the selected image model supports it.'), + background: z + .enum(OPENAI_IMAGE_GENERATION_BACKGROUND_VALUES) + .optional() + .describe('Optional background hint when the selected image model supports it.'), + moderation: z + .enum(IMAGE_GENERATION_MODERATION_VALUES) + .optional() + .describe('Optional moderation hint when the selected image model supports it.') +}) type ImageGenerateInput = z.infer type ImageGenerationModelSelection = { @@ -99,7 +97,7 @@ export class AgentImageGenerationTool { name: IMAGE_GENERATE_TOOL_NAME, description: 'Generate a new image from a text prompt using the DeepChat Agent configured image generation model. Use this when the user asks to create, draw, render, or generate an image. The generated image is returned as a DeepChat image preview, not as text.', - parameters: zodToJsonSchema(imageGenerateSchema) as { + parameters: toDeepChatJsonSchema(imageGenerateSchema) as { type: string properties: Record required?: string[] diff --git a/src/main/presenter/toolPresenter/agentTools/agentMemoryTools.ts b/src/main/presenter/toolPresenter/agentTools/agentMemoryTools.ts index 71896f1c4..bfba55758 100644 --- a/src/main/presenter/toolPresenter/agentTools/agentMemoryTools.ts +++ b/src/main/presenter/toolPresenter/agentTools/agentMemoryTools.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import type { MCPToolDefinition } from '@shared/presenter' import { createAgentToolSuccessResult } from '@shared/lib/agentToolResultEnvelope' import { AGENT_MEMORY_CATEGORIES } from '@shared/types/agent-memory' @@ -15,43 +15,37 @@ export const MEMORY_TOOL_NAMES = { type MemoryToolName = (typeof MEMORY_TOOL_NAMES)[keyof typeof MEMORY_TOOL_NAMES] -const rememberSchema = z - .object({ - content: z - .string() - .trim() - .min(1) - .describe('The durable fact or event to remember long-term, written in third person.'), - kind: z - .enum(['semantic', 'episodic']) - .optional() - .default('semantic') - .describe('semantic = stable fact/preference; episodic = a specific event.'), - category: z - .enum(AGENT_MEMORY_CATEGORIES) - .optional() - .describe('Optional agentic memory category; when provided it takes precedence over kind.'), - importance: z - .number() - .min(0) - .max(1) - .optional() - .default(0.7) - .describe('Importance 0..1 (affects retention and recall priority).') - }) - .strict() - -const recallSchema = z - .object({ - query: z.string().trim().min(1).describe('What to recall; matched against stored memories.') - }) - .strict() - -const forgetSchema = z - .object({ - memoryId: z.string().trim().min(1).describe('The id of the memory to forget.') - }) - .strict() +const rememberSchema = z.strictObject({ + content: z + .string() + .trim() + .min(1) + .describe('The durable fact or event to remember long-term, written in third person.'), + kind: z + .enum(['semantic', 'episodic']) + .optional() + .default('semantic') + .describe('semantic = stable fact/preference; episodic = a specific event.'), + category: z + .enum(AGENT_MEMORY_CATEGORIES) + .optional() + .describe('Optional agentic memory category; when provided it takes precedence over kind.'), + importance: z + .number() + .min(0) + .max(1) + .optional() + .default(0.7) + .describe('Importance 0..1 (affects retention and recall priority).') +}) + +const recallSchema = z.strictObject({ + query: z.string().trim().min(1).describe('What to recall; matched against stored memories.') +}) + +const forgetSchema = z.strictObject({ + memoryId: z.string().trim().min(1).describe('The id of the memory to forget.') +}) const memoryToolSchemas = { [MEMORY_TOOL_NAMES.remember]: rememberSchema, @@ -69,7 +63,7 @@ function buildToolDefinition( function: { name, description, - parameters: zodToJsonSchema(schema) as { + parameters: toDeepChatJsonSchema(schema) as { type: string properties: Record required?: string[] diff --git a/src/main/presenter/toolPresenter/agentTools/agentPlanTool.ts b/src/main/presenter/toolPresenter/agentTools/agentPlanTool.ts index da88622f4..b225be777 100644 --- a/src/main/presenter/toolPresenter/agentTools/agentPlanTool.ts +++ b/src/main/presenter/toolPresenter/agentTools/agentPlanTool.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import type { MCPToolDefinition } from '@shared/presenter' import type { AgentToolProgressUpdate } from '@shared/types/presenters/tool.presenter' import { @@ -16,11 +16,10 @@ export const AGENT_CORE_TOOL_SERVER_NAME = 'agent-core' const MAX_PLAN_ITEMS = 12 export const updatePlanToolArgsSchema = z - .object({ + .strictObject({ explanation: z.string().optional(), plan: z.array(agentPlanItemSchema).max(MAX_PLAN_ITEMS) }) - .strict() .superRefine((value, context) => { const inProgressCount = value.plan.filter((item) => item.status === 'in_progress').length if (inProgressCount > 1) { @@ -57,7 +56,7 @@ export class AgentPlanTool { name: UPDATE_PLAN_TOOL_NAME, description: 'Update the visible progress checklist for the current multi-step task. Provide the complete current plan snapshot every time. Use short, concrete, verifiable steps. At most one step may be in_progress.', - parameters: zodToJsonSchema(updatePlanToolArgsSchema) as { + parameters: toDeepChatJsonSchema(updatePlanToolArgsSchema) as { type: string properties: Record required?: string[] diff --git a/src/main/presenter/toolPresenter/agentTools/agentTapeTools.ts b/src/main/presenter/toolPresenter/agentTools/agentTapeTools.ts index 7ac675666..413c9cbaa 100644 --- a/src/main/presenter/toolPresenter/agentTools/agentTapeTools.ts +++ b/src/main/presenter/toolPresenter/agentTools/agentTapeTools.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import type { MCPToolDefinition } from '@shared/presenter' import { createAgentToolSuccessResult } from '@shared/lib/agentToolResultEnvelope' import type { AgentToolRuntimePort } from '../runtimePorts' @@ -109,22 +109,20 @@ const tapeContextSchema = z.object({ .describe('Maximum evidence bytes across all returned entries. Defaults to 16384.') }) -const tapeHandoffSchema = z - .object({ - name: z - .string() - .trim() - .min(1) - .optional() - .describe('Handoff name. Values without a prefix are normalized to handoff/.'), - summary: z - .string() - .trim() - .optional() - .default('') - .describe('Compact durable summary for the handoff anchor.') - }) - .strict() +const tapeHandoffSchema = z.strictObject({ + name: z + .string() + .trim() + .min(1) + .optional() + .describe('Handoff name. Values without a prefix are normalized to handoff/.'), + summary: z + .string() + .trim() + .optional() + .default('') + .describe('Compact durable summary for the handoff anchor.') +}) const tapeToolSchemas = { [TAPE_TOOL_NAMES.info]: tapeInfoSchema, @@ -152,7 +150,7 @@ function buildToolDefinition( function: { name, description, - parameters: zodToJsonSchema(schema) as { + parameters: toDeepChatJsonSchema(schema) as { type: string properties: Record required?: string[] diff --git a/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts b/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts index ae33ec3a2..bafd05e41 100644 --- a/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts +++ b/src/main/presenter/toolPresenter/agentTools/agentToolManager.ts @@ -1,6 +1,6 @@ import type { IConfigPresenter, MCPToolDefinition } from '@shared/presenter' import type { AgentToolProgressUpdate } from '@shared/types/presenters/tool.presenter' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import { z } from 'zod' import fs from 'fs' import path from 'path' @@ -98,6 +98,7 @@ interface AgentToolExecutionOptions { onProgress?: (update: AgentToolProgressUpdate) => void signal?: AbortSignal allowExternalFileAccess?: boolean + activeSkillNames?: string[] } interface AgentToolPermissionCheckOptions { @@ -355,6 +356,7 @@ export class AgentToolManager { supportsVision: boolean agentWorkspacePath: string | null conversationId?: string + activeSkillNames?: string[] }): Promise { const defs: MCPToolDefinition[] = [] const isAgentMode = context.chatMode === 'agent' @@ -428,7 +430,10 @@ export class AgentToolManager { const skillDefs = this.getSkillToolDefinitions() defs.push(...skillDefs) - if (context.conversationId && (await this.hasRunnableSkillScripts(context.conversationId))) { + if ( + context.conversationId && + (await this.hasRunnableSkillScripts(context.conversationId, context.activeSkillNames)) + ) { defs.push(this.getSkillRunToolDefinition()) } } @@ -436,10 +441,13 @@ export class AgentToolManager { // 4. DeepChat settings tools (agent mode only, skill gated) if (isAgentMode && this.isSkillsEnabled() && context.conversationId) { try { - const activeSkills = await this.getSkillPresenter().getActiveSkills(context.conversationId) + const activeSkills = + context.activeSkillNames ?? + (await this.getSkillPresenter().getActiveSkills(context.conversationId)) if (activeSkills.includes(CHAT_SETTINGS_SKILL_NAME)) { const allowedTools = await this.getSkillPresenter().getActiveSkillsAllowedTools( - context.conversationId + context.conversationId, + activeSkills ) const requiredSettingsTools = Object.values(CHAT_SETTINGS_TOOL_NAMES) const nonOpenSettingsTools = requiredSettingsTools.filter( @@ -548,11 +556,11 @@ export class AgentToolManager { // Route to Skill tools if (this.isSkillTool(toolName)) { - return await this.callSkillTool(toolName, args, conversationId) + return await this.callSkillTool(toolName, args, conversationId, options) } if (this.isSkillExecutionTool(toolName)) { - return await this.callSkillExecutionTool(toolName, args, conversationId) + return await this.callSkillExecutionTool(toolName, args, conversationId, options) } // Route to DeepChat settings tools @@ -625,7 +633,7 @@ export class AgentToolManager { name: 'read', description: "Read the contents of a file. Supports pagination via offset/limit for large files (auto-truncated at 4500 chars if not specified). For image files, returns an English description of visible content instead of raw pixels. When invoked from a skill context with relative paths, provide base_directory as the skill's root directory.", - parameters: zodToJsonSchema(schemas.read) as { + parameters: toDeepChatJsonSchema(schemas.read) as { type: string properties: Record required?: string[] @@ -643,7 +651,7 @@ export class AgentToolManager { name: 'write', description: "Write content to a file. For skill files, provide base_directory as the skill's root directory.", - parameters: zodToJsonSchema(schemas.write) as { + parameters: toDeepChatJsonSchema(schemas.write) as { type: string properties: Record required?: string[] @@ -661,7 +669,7 @@ export class AgentToolManager { name: 'edit', description: 'Make precise text or line replacements in a file by matching exact text strings. Set replaceAll=false to replace only the first match.', - parameters: zodToJsonSchema(schemas.edit) as { + parameters: toDeepChatJsonSchema(schemas.edit) as { type: string properties: Record required?: string[] @@ -679,7 +687,7 @@ export class AgentToolManager { name: GLOB_TOOL_NAME, description: 'Search file paths in the workspace. Use this before content search. Returns JSON Array<{path, score}>.', - parameters: zodToJsonSchema(schemas[GLOB_TOOL_NAME]) as { + parameters: toDeepChatJsonSchema(schemas[GLOB_TOOL_NAME]) as { type: string properties: Record required?: string[] @@ -697,7 +705,7 @@ export class AgentToolManager { name: GREP_TOOL_NAME, description: 'Search file contents in the workspace. Prefer passing pathScope from glob. Use mode=regex for regular expressions. Returns JSON Array<{path, lineNumber, snippet, score}>.', - parameters: zodToJsonSchema(schemas[GREP_TOOL_NAME]) as { + parameters: toDeepChatJsonSchema(schemas[GREP_TOOL_NAME]) as { type: string properties: Record required?: string[] @@ -715,7 +723,7 @@ export class AgentToolManager { name: 'exec', description: 'Execute a shell command in the current working directory or an explicit cwd. External cwd paths are allowed in Full Access mode; default mode asks for approval. Use background: true when you know the command should detach immediately. Otherwise foreground exec waits briefly, and long-running commands may auto-background and return a session ID for use with the process tool.', - parameters: zodToJsonSchema(schemas.exec) as { + parameters: toDeepChatJsonSchema(schemas.exec) as { type: string properties: Record required?: string[] @@ -733,7 +741,7 @@ export class AgentToolManager { name: 'process', description: 'Manage background exec sessions created by explicit background exec calls or by long-running foreground exec calls that yielded a sessionId. Use poll to check output and status, log to get full output with pagination, write to send input to stdin, kill to terminate, and remove to clean up completed sessions.', - parameters: zodToJsonSchema(schemas.process) as { + parameters: toDeepChatJsonSchema(schemas.process) as { type: string properties: Record required?: string[] @@ -757,7 +765,7 @@ export class AgentToolManager { name: QUESTION_TOOL_NAME, description: 'Pause the agent loop and ask the user one structured clarification question when missing user preferences, implementation direction, output shape, or risk decisions would materially change the result. Do not use this for casual conversation or for facts you can discover from the repo, tools, or existing context. The loop resumes only after the user responds.', - parameters: zodToJsonSchema(questionToolSchema) as { + parameters: toDeepChatJsonSchema(questionToolSchema) as { type: string properties: Record required?: string[] @@ -938,7 +946,8 @@ export class AgentToolManager { const allowedDirectories = await this.buildAllowedDirectories(workspaceRoot, conversationId, { includeSkillRoots: toolName !== 'exec', includeRuntimeRoots: toolName !== 'exec', - requiredPermission: this.getRequiredFilePermission(toolName) + requiredPermission: this.getRequiredFilePermission(toolName), + activeSkillNames: options?.activeSkillNames }) if (toolName === 'exec') { @@ -1217,6 +1226,7 @@ export class AgentToolManager { includeSkillRoots?: boolean includeRuntimeRoots?: boolean requiredPermission?: FilePermissionLevel + activeSkillNames?: string[] } = {} ): Promise { const includeSkillRoots = options.includeSkillRoots !== false @@ -1236,7 +1246,10 @@ export class AgentToolManager { addPath(this.agentWorkspacePath) if (conversationId && includeSkillRoots) { - const activeSkillRoots = await this.resolveActiveSkillRoots(conversationId) + const activeSkillRoots = await this.resolveActiveSkillRoots( + conversationId, + options.activeSkillNames + ) for (const skillRoot of activeSkillRoots) { addPath(skillRoot) } @@ -1261,7 +1274,10 @@ export class AgentToolManager { return ordered } - private async resolveActiveSkillRoots(conversationId: string): Promise { + private async resolveActiveSkillRoots( + conversationId: string, + activeSkillNamesOverride?: string[] + ): Promise { const skillPresenter = this.getSkillPresenter() if (!skillPresenter?.getActiveSkills || !skillPresenter?.getMetadataList) { return [] @@ -1272,7 +1288,7 @@ export class AgentToolManager { try { ;[activeSkillNames, metadataList] = await Promise.all([ - skillPresenter.getActiveSkills(conversationId), + activeSkillNamesOverride ?? skillPresenter.getActiveSkills(conversationId), skillPresenter.getMetadataList() ]) } catch (error) { @@ -1823,7 +1839,7 @@ export class AgentToolManager { name: 'skill_list', description: 'List all available skills and their activation status. Skills provide specialized expertise and behavioral guidance.', - parameters: zodToJsonSchema(schemas.skill_list) as { + parameters: toDeepChatJsonSchema(schemas.skill_list) as { type: string properties: Record required?: string[] @@ -1841,7 +1857,7 @@ export class AgentToolManager { name: 'skill_view', description: 'Inspect a specific skill before relying on it. Returns the rendered SKILL.md body or a requested supporting file under the skill root.', - parameters: zodToJsonSchema(schemas.skill_view) as { + parameters: toDeepChatJsonSchema(schemas.skill_view) as { type: string properties: Record required?: string[] @@ -1859,7 +1875,7 @@ export class AgentToolManager { name: 'skill_manage', description: 'Create or edit temporary draft skills in the conversation draft area. Use the returned draftId for follow-up draft operations. This cannot modify installed skills.', - parameters: zodToJsonSchema(schemas.skill_manage) as { + parameters: toDeepChatJsonSchema(schemas.skill_manage) as { type: string properties: Record required?: string[] @@ -1880,8 +1896,8 @@ export class AgentToolManager { function: { name: 'skill_run', description: - 'Run a bundled script from a pinned skill. This is the preferred way to execute skill-local Python, Node, or shell helpers without guessing paths.', - parameters: zodToJsonSchema(this.skillSchemas.skill_run) as { + 'Run a bundled script from a skill active in the current message/tool loop. This is the preferred way to execute skill-local Python, Node, or shell helpers without guessing paths.', + parameters: toDeepChatJsonSchema(this.skillSchemas.skill_run) as { type: string properties: Record required?: string[] @@ -1903,9 +1919,13 @@ export class AgentToolManager { return toolName === 'skill_run' } - private async hasRunnableSkillScripts(conversationId: string): Promise { + private async hasRunnableSkillScripts( + conversationId: string, + activeSkillNames?: string[] + ): Promise { try { - const activeSkills = await this.getSkillPresenter().getActiveSkills(conversationId) + const activeSkills = + activeSkillNames ?? (await this.getSkillPresenter().getActiveSkills(conversationId)) for (const skillName of activeSkills) { const scripts = await this.getSkillPresenter().listSkillScripts(skillName) if (scripts.some((script) => script.enabled)) { @@ -2082,10 +2102,21 @@ export class AgentToolManager { ) } + private normalizeActiveSkillOption(activeSkillNames?: string[]): string[] { + return Array.from( + new Set( + (activeSkillNames ?? []) + .map((skillName) => skillName.trim()) + .filter((skillName) => skillName.length > 0) + ) + ) + } + private async callSkillTool( toolName: string, args: Record, - conversationId?: string + conversationId?: string, + options?: AgentToolExecutionOptions ): Promise { if (!this.isSkillsEnabled()) { return { @@ -2114,20 +2145,15 @@ export class AgentToolManager { ? validationResult.data.file_path.trim() : '' const isLinkedFileView = normalizedFilePath.length > 0 - const previousActiveSkills = - conversationId && !isLinkedFileView - ? await this.getSkillPresenter().getActiveSkills(conversationId) - : [] + const effectiveActiveSkills = this.normalizeActiveSkillOption(options?.activeSkillNames) const result = await skillTools.handleSkillView(conversationId, validationResult.data) - const nextActiveSkills = - conversationId && !isLinkedFileView - ? await this.getSkillPresenter().getActiveSkills(conversationId) - : previousActiveSkills + const normalizedViewedSkill = result.name?.trim() || validationResult.data.name.trim() const activationApplied = Boolean(conversationId) && + result.success === true && !isLinkedFileView && - !previousActiveSkills.includes(validationResult.data.name) && - nextActiveSkills.includes(validationResult.data.name) + Boolean(normalizedViewedSkill) && + !effectiveActiveSkills.includes(normalizedViewedSkill) const activationSource = !conversationId || result.success !== true ? 'none' @@ -2136,7 +2162,17 @@ export class AgentToolManager { : isLinkedFileView ? 'file' : 'none' - const content = JSON.stringify(result) + const content = JSON.stringify({ + ...result, + isPinned: result.isPinned === true, + activeForCurrentMessage: + result.isPinned === true || + (!isLinkedFileView && + Boolean(normalizedViewedSkill) && + (activationApplied || effectiveActiveSkills.includes(normalizedViewedSkill))), + activatedForMessage: activationApplied, + activationScope: activationApplied ? 'message' : 'none' + }) return { content, @@ -2145,7 +2181,7 @@ export class AgentToolManager { toolResult: { activationApplied, activationSource, - ...(activationApplied ? { activatedSkill: validationResult.data.name } : {}) + ...(activationApplied ? { activatedSkill: normalizedViewedSkill } : {}) } } } @@ -2193,7 +2229,8 @@ export class AgentToolManager { private async callSkillExecutionTool( toolName: string, args: Record, - conversationId?: string + conversationId?: string, + options?: AgentToolExecutionOptions ): Promise { if (toolName !== 'skill_run') { throw new Error(`Unknown skill execution tool: ${toolName}`) @@ -2209,7 +2246,8 @@ export class AgentToolManager { } const result = await this.getSkillExecutionService().execute(validationResult.data, { - conversationId + conversationId, + activeSkillNames: options?.activeSkillNames }) const content = typeof result.output === 'string' ? result.output : JSON.stringify(result.output, null, 2) diff --git a/src/main/presenter/toolPresenter/agentTools/chatSettingsTools.ts b/src/main/presenter/toolPresenter/agentTools/chatSettingsTools.ts index 3d94f69a5..acce6cc34 100644 --- a/src/main/presenter/toolPresenter/agentTools/chatSettingsTools.ts +++ b/src/main/presenter/toolPresenter/agentTools/chatSettingsTools.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { zodToJsonSchema } from 'zod-to-json-schema' +import { toDeepChatJsonSchema } from '@shared/lib/zodJsonSchema' import type { ApplyChatSettingResult, ChatSettingValue, @@ -49,40 +49,32 @@ const SUPPORTED_THEMES = ['dark', 'light', 'system'] as const const FONT_SIZE_LEVELS = [0, 1, 2, 3, 4] as const -const toggleSchema = z - .object({ - setting: z.enum(['copyWithCotEnabled']).describe('Toggle setting id.'), - enabled: z.boolean().describe('Enable or disable the setting.') - }) - .strict() - -const languageSchema = z - .object({ - language: z.enum(SUPPORTED_LANGUAGES).describe('DeepChat language/locale.') - }) - .strict() - -const themeSchema = z - .object({ - theme: z.enum(SUPPORTED_THEMES).describe('Theme mode for DeepChat.') - }) - .strict() - -const fontSizeSchema = z - .object({ - level: z - .union( - FONT_SIZE_LEVELS.map((value) => z.literal(value)) as [ - z.ZodLiteral<0>, - z.ZodLiteral<1>, - z.ZodLiteral<2>, - z.ZodLiteral<3>, - z.ZodLiteral<4> - ] - ) - .describe('Font size level (0-4).') - }) - .strict() +const toggleSchema = z.strictObject({ + setting: z.enum(['copyWithCotEnabled']).describe('Toggle setting id.'), + enabled: z.boolean().describe('Enable or disable the setting.') +}) + +const languageSchema = z.strictObject({ + language: z.enum(SUPPORTED_LANGUAGES).describe('DeepChat language/locale.') +}) + +const themeSchema = z.strictObject({ + theme: z.enum(SUPPORTED_THEMES).describe('Theme mode for DeepChat.') +}) + +const fontSizeSchema = z.strictObject({ + level: z + .union( + FONT_SIZE_LEVELS.map((value) => z.literal(value)) as [ + z.ZodLiteral<0>, + z.ZodLiteral<1>, + z.ZodLiteral<2>, + z.ZodLiteral<3>, + z.ZodLiteral<4> + ] + ) + .describe('Font size level (0-4).') +}) const SECTION_ALIASES: Record = { appearance: 'display', @@ -129,11 +121,9 @@ const OPEN_SECTION_ALIASES = [ const OPEN_SECTION_VALUES = [...OPEN_SECTIONS, ...OPEN_SECTION_ALIASES] as const -const openSchema = z - .object({ - section: z.enum([...OPEN_SECTION_VALUES] as [string, ...string[]]).optional() - }) - .strict() +const openSchema = z.strictObject({ + section: z.enum([...OPEN_SECTION_VALUES] as [string, ...string[]]).optional() +}) const SETTINGS_ROUTE_NAMES = { common: 'settings-common', @@ -223,7 +213,7 @@ export class ChatSettingsToolHandler { const parsed = toggleSchema.safeParse(raw) if (!parsed.success) { - return buildError('invalid_request', 'Invalid toggle request.', parsed.error.flatten()) + return buildError('invalid_request', 'Invalid toggle request.', z.flattenError(parsed.error)) } const { setting, enabled } = parsed.data @@ -263,7 +253,11 @@ export class ChatSettingsToolHandler { const parsed = languageSchema.safeParse(raw) if (!parsed.success) { - return buildError('invalid_request', 'Invalid language request.', parsed.error.flatten()) + return buildError( + 'invalid_request', + 'Invalid language request.', + z.flattenError(parsed.error) + ) } const { language } = parsed.data @@ -295,7 +289,7 @@ export class ChatSettingsToolHandler { const parsed = themeSchema.safeParse(raw) if (!parsed.success) { - return buildError('invalid_request', 'Invalid theme request.', parsed.error.flatten()) + return buildError('invalid_request', 'Invalid theme request.', z.flattenError(parsed.error)) } const { theme } = parsed.data @@ -327,7 +321,11 @@ export class ChatSettingsToolHandler { const parsed = fontSizeSchema.safeParse(raw) if (!parsed.success) { - return buildError('invalid_request', 'Invalid font size request.', parsed.error.flatten()) + return buildError( + 'invalid_request', + 'Invalid font size request.', + z.flattenError(parsed.error) + ) } const { level } = parsed.data @@ -368,7 +366,7 @@ export class ChatSettingsToolHandler { ok: false, errorCode: 'invalid_request', message: 'Invalid settings navigation request.', - details: parsed.error.flatten() + details: z.flattenError(parsed.error) } } @@ -415,7 +413,7 @@ export const buildChatSettingsToolDefinitions = (allowedTools: string[]): MCPToo function: { name: CHAT_SETTINGS_TOOL_NAMES.toggle, description: 'Toggle a DeepChat setting.', - parameters: zodToJsonSchema(toggleSchema) as { + parameters: toDeepChatJsonSchema(toggleSchema) as { type: string properties: Record required?: string[] @@ -435,7 +433,7 @@ export const buildChatSettingsToolDefinitions = (allowedTools: string[]): MCPToo function: { name: CHAT_SETTINGS_TOOL_NAMES.setLanguage, description: 'Set DeepChat language/locale.', - parameters: zodToJsonSchema(languageSchema) as { + parameters: toDeepChatJsonSchema(languageSchema) as { type: string properties: Record required?: string[] @@ -455,7 +453,7 @@ export const buildChatSettingsToolDefinitions = (allowedTools: string[]): MCPToo function: { name: CHAT_SETTINGS_TOOL_NAMES.setTheme, description: 'Set DeepChat theme mode.', - parameters: zodToJsonSchema(themeSchema) as { + parameters: toDeepChatJsonSchema(themeSchema) as { type: string properties: Record required?: string[] @@ -475,7 +473,7 @@ export const buildChatSettingsToolDefinitions = (allowedTools: string[]): MCPToo function: { name: CHAT_SETTINGS_TOOL_NAMES.setFontSize, description: 'Set DeepChat font size level.', - parameters: zodToJsonSchema(fontSizeSchema) as { + parameters: toDeepChatJsonSchema(fontSizeSchema) as { type: string properties: Record required?: string[] @@ -496,7 +494,7 @@ export const buildChatSettingsToolDefinitions = (allowedTools: string[]): MCPToo name: CHAT_SETTINGS_TOOL_NAMES.open, description: 'Open DeepChat settings only when the request cannot be fulfilled via other settings tools; do not call after the change is already applied.', - parameters: zodToJsonSchema(openSchema) as { + parameters: toDeepChatJsonSchema(openSchema) as { type: string properties: Record required?: string[] diff --git a/src/main/presenter/toolPresenter/index.ts b/src/main/presenter/toolPresenter/index.ts index 5fee8737f..217bb5196 100644 --- a/src/main/presenter/toolPresenter/index.ts +++ b/src/main/presenter/toolPresenter/index.ts @@ -61,6 +61,7 @@ export interface IToolPresenter { supportsVision?: boolean agentWorkspacePath?: string | null conversationId?: string + activeSkillNames?: string[] }): Promise syncAgentToolContext?(context: { chatMode?: 'agent' | 'acp agent' @@ -72,6 +73,7 @@ export interface IToolPresenter { onProgress?: (update: AgentToolProgressUpdate) => void signal?: AbortSignal permissionMode?: PermissionMode + activeSkillNames?: string[] } ): Promise<{ content: unknown; rawData: MCPToolResponse }> preCheckToolPermission?( @@ -163,6 +165,7 @@ export class ToolPresenter implements IToolPresenter { supportsVision?: boolean agentWorkspacePath?: string | null conversationId?: string + activeSkillNames?: string[] }): Promise { const defs: MCPToolDefinition[] = [] const mapper = this.resolveMapper(context.conversationId) @@ -194,7 +197,8 @@ export class ToolPresenter implements IToolPresenter { chatMode, supportsVision, agentWorkspacePath, - conversationId: context.conversationId + conversationId: context.conversationId, + activeSkillNames: context.activeSkillNames }), 'agent' ) @@ -260,6 +264,7 @@ export class ToolPresenter implements IToolPresenter { onProgress?: (update: AgentToolProgressUpdate) => void signal?: AbortSignal permissionMode?: PermissionMode + activeSkillNames?: string[] } ): Promise<{ content: unknown; rawData: MCPToolResponse }> { const toolName = request.function.name @@ -300,7 +305,8 @@ export class ToolPresenter implements IToolPresenter { toolCallId: request.id, onProgress: options?.onProgress, signal: options?.signal, - allowExternalFileAccess: options?.permissionMode === 'full_access' + allowExternalFileAccess: options?.permissionMode === 'full_access', + activeSkillNames: options?.activeSkillNames } ) const resolvedResponse = this.resolveAgentToolResponse(response) @@ -613,12 +619,12 @@ export class ToolPresenter implements IToolPresenter { let hasContent = false if (toolNames.has('skill_list')) { - lines.push('- Use `skill_list` to inspect installed skills and pinned status.') + lines.push('- Use `skill_list` to inspect installed skills and manual pin status.') hasContent = true } if (toolNames.has('skill_view')) { lines.push( - '- Use `skill_view` to inspect a skill or one of its linked files before relying on it.' + '- Use `skill_view` to inspect a skill or one of its linked files before relying on it. Root skill views activate the skill for the current message/tool loop only; they do not pin it to the conversation.' ) hasContent = true } @@ -629,7 +635,9 @@ export class ToolPresenter implements IToolPresenter { hasContent = true } if (toolNames.has('skill_run')) { - lines.push('- Use `skill_run` to execute bundled scripts from pinned skills.') + lines.push( + '- Use `skill_run` to execute bundled scripts from skills active in the current message/tool loop.' + ) hasContent = true } diff --git a/src/main/routes/index.ts b/src/main/routes/index.ts index 2c5346a48..ec3ce24c9 100644 --- a/src/main/routes/index.ts +++ b/src/main/routes/index.ts @@ -159,6 +159,8 @@ import { oauthOpenAICodexGetStatusRoute, oauthOpenAICodexLogoutRoute, oauthOpenAICodexStartBrowserLoginRoute, + remoteControlCancelFeishuAuthRoute, + remoteControlCancelFeishuInstallRoute, remoteControlClearChannelPairCodeRoute, remoteControlCreateChannelPairCodeRoute, remoteControlGetChannelBindingsRoute, @@ -173,7 +175,11 @@ import { remoteControlRemoveWeixinIlinkAccountRoute, remoteControlRestartWeixinIlinkAccountRoute, remoteControlSaveChannelSettingsRoute, + remoteControlStartFeishuAuthRoute, + remoteControlStartFeishuInstallRoute, remoteControlStartWeixinIlinkLoginRoute, + remoteControlWaitForFeishuAuthRoute, + remoteControlWaitForFeishuInstallRoute, remoteControlWaitForWeixinIlinkLoginRoute, pluginsDisableRoute, pluginsEnableRoute, @@ -264,25 +270,44 @@ import { skillsGetDirectoryRoute, skillsGetExtensionRoute, skillsGetFolderTreeRoute, + skillsGetSyncConfigRoute, + skillsExecuteSyncDirectoryExportRoute, + skillsExecuteSyncDirectoryImportRoute, + skillsInstallFromGitRoute, skillsInstallFromFolderRoute, skillsInstallFromUrlRoute, skillsInstallFromZipRoute, + skillsListCatalogRoute, skillsListMetadataRoute, skillsListScriptsRoute, skillsOpenFolderRoute, + skillsPreviewSyncDirectoryExportRoute, + skillsPreviewSyncDirectoryImportRoute, skillsReadFileRoute, + skillsScanGitRepoRoute, skillsSaveExtensionRoute, skillsSaveWithExtensionRoute, skillsSetActiveRoute, + skillsSetDisabledRoute, + skillsSetSyncDirectoryRoute, skillsUninstallRoute, skillsUpdateFileRoute, skillSyncAcknowledgeDiscoveriesRoute, + skillSyncExecuteAdoptAgentSkillRoute, skillSyncExecuteExportRoute, skillSyncExecuteImportRoute, + skillSyncExecuteLinkDeepChatSkillsRoute, + skillSyncGetAgentDetailRoute, + skillSyncGetAgentSkillDetailRoute, skillSyncGetNewDiscoveriesRoute, skillSyncGetRegisteredToolsRoute, + skillSyncPreviewAdoptAgentSkillRoute, skillSyncPreviewExportRoute, skillSyncPreviewImportRoute, + skillSyncPreviewLinkDeepChatSkillsRoute, + skillSyncRemoveAgentSkillLinkRoute, + skillSyncRepairAgentSkillLinkRoute, + skillSyncScanAgentsRoute, skillSyncScanExternalToolsRoute, syncGetBackupStatusRoute, syncImportRoute, @@ -2980,6 +3005,21 @@ export async function dispatchDeepchatRoute( }) } + case skillsListCatalogRoute.name: { + return await runTrackedRouteTask(runtime, routeName, context, async () => { + skillsListCatalogRoute.input.parse(rawInput) + const skills = await runtime.skillPresenter.getUnifiedSkillCatalog() + return skillsListCatalogRoute.output.parse({ skills }) + }) + } + + case skillsSetDisabledRoute.name: { + const input = skillsSetDisabledRoute.input.parse(rawInput) + await runtime.skillPresenter.setSkillDeepChatDisabled(input.name, input.disabled) + recordSkillUpdatedActivity(runtime, input.name, 'skill-disabled-state') + return skillsSetDisabledRoute.output.parse({ saved: true }) + } + case skillsGetDirectoryRoute.name: { skillsGetDirectoryRoute.input.parse(rawInput) const path = await runtime.skillPresenter.getSkillsDir() @@ -3013,6 +3053,57 @@ export async function dispatchDeepchatRoute( return skillsInstallFromUrlRoute.output.parse({ result }) } + case skillsScanGitRepoRoute.name: { + const input = skillsScanGitRepoRoute.input.parse(rawInput) + const result = await runtime.skillPresenter.scanGitSkillRepo(input.repoUrl) + return skillsScanGitRepoRoute.output.parse({ result }) + } + + case skillsInstallFromGitRoute.name: { + const input = skillsInstallFromGitRoute.input.parse(rawInput) + const results = await runtime.skillPresenter.installSkillsFromGit(input) + if (results.some(didSkillOperationSucceed)) { + recordSkillSettingsActivity(runtime, 'created', 'skill Git source') + } + return skillsInstallFromGitRoute.output.parse({ results }) + } + + case skillsGetSyncConfigRoute.name: { + skillsGetSyncConfigRoute.input.parse(rawInput) + const config = await runtime.skillPresenter.getSkillsSyncConfig() + return skillsGetSyncConfigRoute.output.parse({ config }) + } + + case skillsSetSyncDirectoryRoute.name: { + const input = skillsSetSyncDirectoryRoute.input.parse(rawInput) + const config = await runtime.skillPresenter.setSkillsSyncDirectory(input) + return skillsSetSyncDirectoryRoute.output.parse({ config }) + } + + case skillsPreviewSyncDirectoryExportRoute.name: { + const input = skillsPreviewSyncDirectoryExportRoute.input.parse(rawInput) + const preview = await runtime.skillPresenter.previewSyncDirectoryExport(input) + return skillsPreviewSyncDirectoryExportRoute.output.parse({ preview }) + } + + case skillsExecuteSyncDirectoryExportRoute.name: { + const input = skillsExecuteSyncDirectoryExportRoute.input.parse(rawInput) + const result = await runtime.skillPresenter.executeSyncDirectoryExport(input) + return skillsExecuteSyncDirectoryExportRoute.output.parse({ result }) + } + + case skillsPreviewSyncDirectoryImportRoute.name: { + skillsPreviewSyncDirectoryImportRoute.input.parse(rawInput) + const preview = await runtime.skillPresenter.previewSyncDirectoryImport() + return skillsPreviewSyncDirectoryImportRoute.output.parse({ preview }) + } + + case skillsExecuteSyncDirectoryImportRoute.name: { + const input = skillsExecuteSyncDirectoryImportRoute.input.parse(rawInput) + const result = await runtime.skillPresenter.executeSyncDirectoryImport(input) + return skillsExecuteSyncDirectoryImportRoute.output.parse({ result }) + } + case skillsUninstallRoute.name: { const input = skillsUninstallRoute.input.parse(rawInput) const result = await runtime.skillPresenter.uninstallSkill(input.name) @@ -3137,6 +3228,69 @@ export async function dispatchDeepchatRoute( }) } + case skillSyncScanAgentsRoute.name: { + skillSyncScanAgentsRoute.input.parse(rawInput) + return skillSyncScanAgentsRoute.output.parse({ + agents: await runtime.skillSyncPresenter.scanSkillAgents() + }) + } + + case skillSyncGetAgentDetailRoute.name: { + const input = skillSyncGetAgentDetailRoute.input.parse(rawInput) + return skillSyncGetAgentDetailRoute.output.parse({ + agent: await runtime.skillSyncPresenter.scanSkillAgent({ agentId: input.agentId }) + }) + } + + case skillSyncGetAgentSkillDetailRoute.name: { + const input = skillSyncGetAgentSkillDetailRoute.input.parse(rawInput) + return skillSyncGetAgentSkillDetailRoute.output.parse({ + detail: await runtime.skillSyncPresenter.getAgentSkillDetail(input) + }) + } + + case skillSyncPreviewAdoptAgentSkillRoute.name: { + const input = skillSyncPreviewAdoptAgentSkillRoute.input.parse(rawInput) + return skillSyncPreviewAdoptAgentSkillRoute.output.parse({ + preview: await runtime.skillSyncPresenter.previewAdoptAgentSkill(input) + }) + } + + case skillSyncExecuteAdoptAgentSkillRoute.name: { + const input = skillSyncExecuteAdoptAgentSkillRoute.input.parse(rawInput) + return skillSyncExecuteAdoptAgentSkillRoute.output.parse({ + result: await runtime.skillSyncPresenter.executeAdoptAgentSkill(input) + }) + } + + case skillSyncPreviewLinkDeepChatSkillsRoute.name: { + const input = skillSyncPreviewLinkDeepChatSkillsRoute.input.parse(rawInput) + return skillSyncPreviewLinkDeepChatSkillsRoute.output.parse({ + preview: await runtime.skillSyncPresenter.previewLinkDeepChatSkills(input) + }) + } + + case skillSyncExecuteLinkDeepChatSkillsRoute.name: { + const input = skillSyncExecuteLinkDeepChatSkillsRoute.input.parse(rawInput) + return skillSyncExecuteLinkDeepChatSkillsRoute.output.parse({ + result: await runtime.skillSyncPresenter.executeLinkDeepChatSkills(input) + }) + } + + case skillSyncRepairAgentSkillLinkRoute.name: { + const input = skillSyncRepairAgentSkillLinkRoute.input.parse(rawInput) + return skillSyncRepairAgentSkillLinkRoute.output.parse({ + result: await runtime.skillSyncPresenter.repairAgentSkillLink(input) + }) + } + + case skillSyncRemoveAgentSkillLinkRoute.name: { + const input = skillSyncRemoveAgentSkillLinkRoute.input.parse(rawInput) + return skillSyncRemoveAgentSkillLinkRoute.output.parse({ + result: await runtime.skillSyncPresenter.removeAgentSkillLink(input) + }) + } + case skillSyncPreviewImportRoute.name: { const input = skillSyncPreviewImportRoute.input.parse(rawInput) return skillSyncPreviewImportRoute.output.parse({ @@ -3543,6 +3697,42 @@ export async function dispatchDeepchatRoute( return remoteControlGetTelegramStatusRoute.output.parse({ status }) } + case remoteControlStartFeishuAuthRoute.name: { + const input = remoteControlStartFeishuAuthRoute.input.parse(rawInput) + const session = await runtime.remoteControlPresenter.startFeishuAuth(input) + return remoteControlStartFeishuAuthRoute.output.parse({ session }) + } + + case remoteControlWaitForFeishuAuthRoute.name: { + const input = remoteControlWaitForFeishuAuthRoute.input.parse(rawInput) + const result = await runtime.remoteControlPresenter.waitForFeishuAuth(input) + return remoteControlWaitForFeishuAuthRoute.output.parse({ result }) + } + + case remoteControlCancelFeishuAuthRoute.name: { + const input = remoteControlCancelFeishuAuthRoute.input.parse(rawInput) + await runtime.remoteControlPresenter.cancelFeishuAuth(input.sessionKey) + return remoteControlCancelFeishuAuthRoute.output.parse({ cancelled: true }) + } + + case remoteControlStartFeishuInstallRoute.name: { + const input = remoteControlStartFeishuInstallRoute.input.parse(rawInput) + const session = await runtime.remoteControlPresenter.startFeishuInstall(input) + return remoteControlStartFeishuInstallRoute.output.parse({ session }) + } + + case remoteControlWaitForFeishuInstallRoute.name: { + const input = remoteControlWaitForFeishuInstallRoute.input.parse(rawInput) + const result = await runtime.remoteControlPresenter.waitForFeishuInstall(input) + return remoteControlWaitForFeishuInstallRoute.output.parse({ result }) + } + + case remoteControlCancelFeishuInstallRoute.name: { + const input = remoteControlCancelFeishuInstallRoute.input.parse(rawInput) + await runtime.remoteControlPresenter.cancelFeishuInstall(input.sessionKey) + return remoteControlCancelFeishuInstallRoute.output.parse({ cancelled: true }) + } + case remoteControlGetWeixinIlinkStatusRoute.name: { remoteControlGetWeixinIlinkStatusRoute.input.parse(rawInput) const status = await runtime.remoteControlPresenter.getWeixinIlinkStatus() diff --git a/src/preload/createBridge.ts b/src/preload/createBridge.ts index 1f26e248b..19e2028d2 100644 --- a/src/preload/createBridge.ts +++ b/src/preload/createBridge.ts @@ -95,7 +95,7 @@ export function createBridge(ipcRenderer: IpcRendererLike): DeepchatBridge { routeName, normalizedInput ) - return contract.output.parse(output) + return contract.output.parse(output) as DeepchatRouteOutput }, on( diff --git a/src/renderer/api/RemoteControlClient.ts b/src/renderer/api/RemoteControlClient.ts index dd437e958..0c9da4889 100644 --- a/src/renderer/api/RemoteControlClient.ts +++ b/src/renderer/api/RemoteControlClient.ts @@ -1,5 +1,7 @@ import type { DeepchatBridge } from '@shared/contracts/bridge' import { + remoteControlCancelFeishuAuthRoute, + remoteControlCancelFeishuInstallRoute, remoteControlClearChannelPairCodeRoute, remoteControlCreateChannelPairCodeRoute, remoteControlGetChannelBindingsRoute, @@ -14,7 +16,11 @@ import { remoteControlRemoveWeixinIlinkAccountRoute, remoteControlRestartWeixinIlinkAccountRoute, remoteControlSaveChannelSettingsRoute, + remoteControlStartFeishuAuthRoute, + remoteControlStartFeishuInstallRoute, remoteControlStartWeixinIlinkLoginRoute, + remoteControlWaitForFeishuAuthRoute, + remoteControlWaitForFeishuInstallRoute, remoteControlWaitForWeixinIlinkLoginRoute } from '@shared/contracts/routes' import type { @@ -87,6 +93,39 @@ export function createRemoteControlClient(bridge: DeepchatBridge = getDeepchatBr return result.status } + async function startFeishuAuth(input?: { + brand?: 'feishu' | 'lark' + appId?: string + appSecret?: string + redirectUri?: string + }) { + const result = await bridge.invoke(remoteControlStartFeishuAuthRoute.name, input ?? {}) + return result.session + } + + async function waitForFeishuAuth(input: { sessionKey: string; timeoutMs?: number }) { + const result = await bridge.invoke(remoteControlWaitForFeishuAuthRoute.name, input) + return result.result + } + + async function cancelFeishuAuth(sessionKey: string) { + await bridge.invoke(remoteControlCancelFeishuAuthRoute.name, { sessionKey }) + } + + async function startFeishuInstall(input?: { brand?: 'feishu' | 'lark' }) { + const result = await bridge.invoke(remoteControlStartFeishuInstallRoute.name, input ?? {}) + return result.session + } + + async function waitForFeishuInstall(input: { sessionKey: string; timeoutMs?: number }) { + const result = await bridge.invoke(remoteControlWaitForFeishuInstallRoute.name, input) + return result.result + } + + async function cancelFeishuInstall(sessionKey: string) { + await bridge.invoke(remoteControlCancelFeishuInstallRoute.name, { sessionKey }) + } + async function getWeixinIlinkStatus() { const result = await bridge.invoke(remoteControlGetWeixinIlinkStatusRoute.name, {}) return result.status @@ -122,6 +161,12 @@ export function createRemoteControlClient(bridge: DeepchatBridge = getDeepchatBr createChannelPairCode, clearChannelPairCode, getTelegramStatus, + startFeishuAuth, + waitForFeishuAuth, + cancelFeishuAuth, + startFeishuInstall, + waitForFeishuInstall, + cancelFeishuInstall, getWeixinIlinkStatus, startWeixinIlinkLogin, waitForWeixinIlinkLogin, diff --git a/src/renderer/api/SkillClient.ts b/src/renderer/api/SkillClient.ts index 498a849e1..3ab0d3a0d 100644 --- a/src/renderer/api/SkillClient.ts +++ b/src/renderer/api/SkillClient.ts @@ -5,20 +5,36 @@ import { skillsGetDirectoryRoute, skillsGetExtensionRoute, skillsGetFolderTreeRoute, + skillsGetSyncConfigRoute, + skillsExecuteSyncDirectoryExportRoute, + skillsExecuteSyncDirectoryImportRoute, + skillsInstallFromGitRoute, skillsInstallFromFolderRoute, skillsInstallFromUrlRoute, skillsInstallFromZipRoute, + skillsListCatalogRoute, skillsListMetadataRoute, skillsListScriptsRoute, skillsOpenFolderRoute, + skillsPreviewSyncDirectoryExportRoute, + skillsPreviewSyncDirectoryImportRoute, skillsReadFileRoute, + skillsScanGitRepoRoute, skillsSaveExtensionRoute, skillsSaveWithExtensionRoute, skillsSetActiveRoute, + skillsSetDisabledRoute, + skillsSetSyncDirectoryRoute, skillsUninstallRoute, skillsUpdateFileRoute } from '@shared/contracts/routes' -import type { SkillExtensionConfig, SkillInstallOptions } from '@shared/types/skill' +import type { + GitSkillInstallInput, + SkillExtensionConfig, + SkillInstallOptions, + SkillSyncDirectoryExportInput, + SkillSyncDirectoryImportInput +} from '@shared/types/skill' import { getDeepchatBridge } from './core' export function createSkillClient(bridge: DeepchatBridge = getDeepchatBridge()) { @@ -27,6 +43,11 @@ export function createSkillClient(bridge: DeepchatBridge = getDeepchatBridge()) return result.skills } + async function getUnifiedSkillCatalog() { + const result = await bridge.invoke(skillsListCatalogRoute.name, {}) + return result.skills + } + async function getSkillsDir() { const result = await bridge.invoke(skillsGetDirectoryRoute.name, {}) return result.path @@ -56,6 +77,46 @@ export function createSkillClient(bridge: DeepchatBridge = getDeepchatBridge()) return result.result } + async function scanGitSkillRepo(repoUrl: string) { + const result = await bridge.invoke(skillsScanGitRepoRoute.name, { repoUrl }) + return result.result + } + + async function installFromGit(input: GitSkillInstallInput) { + const result = await bridge.invoke(skillsInstallFromGitRoute.name, input) + return result.results + } + + async function getSkillsSyncConfig() { + const result = await bridge.invoke(skillsGetSyncConfigRoute.name, {}) + return result.config + } + + async function setSkillsSyncDirectory(skillsDirectory: string) { + const result = await bridge.invoke(skillsSetSyncDirectoryRoute.name, { skillsDirectory }) + return result.config + } + + async function previewSyncDirectoryExport(input: SkillSyncDirectoryExportInput) { + const result = await bridge.invoke(skillsPreviewSyncDirectoryExportRoute.name, input) + return result.preview + } + + async function executeSyncDirectoryExport(input: SkillSyncDirectoryExportInput) { + const result = await bridge.invoke(skillsExecuteSyncDirectoryExportRoute.name, input) + return result.result + } + + async function previewSyncDirectoryImport() { + const result = await bridge.invoke(skillsPreviewSyncDirectoryImportRoute.name, {}) + return result.preview + } + + async function executeSyncDirectoryImport(input: SkillSyncDirectoryImportInput) { + const result = await bridge.invoke(skillsExecuteSyncDirectoryImportRoute.name, input) + return result.result + } + async function uninstallSkill(name: string) { const result = await bridge.invoke(skillsUninstallRoute.name, { name }) return result.result @@ -102,6 +163,10 @@ export function createSkillClient(bridge: DeepchatBridge = getDeepchatBridge()) await bridge.invoke(skillsSaveExtensionRoute.name, { name, config }) } + async function setSkillDisabled(name: string, disabled: boolean) { + await bridge.invoke(skillsSetDisabledRoute.name, { name, disabled }) + } + async function listSkillScripts(name: string) { const result = await bridge.invoke(skillsListScriptsRoute.name, { name }) return result.scripts @@ -122,7 +187,15 @@ export function createSkillClient(bridge: DeepchatBridge = getDeepchatBridge()) function onCatalogChanged( listener: (payload: { - reason: 'discovered' | 'installed' | 'uninstalled' | 'metadata-updated' + reason: + | 'discovered' + | 'installed' + | 'uninstalled' + | 'metadata-updated' + | 'disabled-updated' + | 'management-state-updated' + | 'git-installed' + | 'sync-directory-updated' name?: string version: number }) => void @@ -143,10 +216,19 @@ export function createSkillClient(bridge: DeepchatBridge = getDeepchatBridge()) return { getMetadataList, + getUnifiedSkillCatalog, getSkillsDir, installFromFolder, installFromZip, installFromUrl, + scanGitSkillRepo, + installFromGit, + getSkillsSyncConfig, + setSkillsSyncDirectory, + previewSyncDirectoryExport, + executeSyncDirectoryExport, + previewSyncDirectoryImport, + executeSyncDirectoryImport, uninstallSkill, readSkillFile, updateSkillFile, @@ -155,6 +237,7 @@ export function createSkillClient(bridge: DeepchatBridge = getDeepchatBridge()) openSkillsFolder, getSkillExtension, saveSkillExtension, + setSkillDisabled, listSkillScripts, getActiveSkills, setActiveSkills, diff --git a/src/renderer/api/SkillSyncClient.ts b/src/renderer/api/SkillSyncClient.ts index 6e984fd58..86e452376 100644 --- a/src/renderer/api/SkillSyncClient.ts +++ b/src/renderer/api/SkillSyncClient.ts @@ -13,21 +13,41 @@ import { import { type DeepchatRouteInput, skillSyncAcknowledgeDiscoveriesRoute, + skillSyncExecuteAdoptAgentSkillRoute, skillSyncExecuteExportRoute, skillSyncExecuteImportRoute, + skillSyncExecuteLinkDeepChatSkillsRoute, + skillSyncGetAgentDetailRoute, + skillSyncGetAgentSkillDetailRoute, skillSyncGetNewDiscoveriesRoute, skillSyncGetRegisteredToolsRoute, + skillSyncPreviewAdoptAgentSkillRoute, skillSyncPreviewExportRoute, skillSyncPreviewImportRoute, + skillSyncPreviewLinkDeepChatSkillsRoute, + skillSyncRemoveAgentSkillLinkRoute, + skillSyncRepairAgentSkillLinkRoute, + skillSyncScanAgentsRoute, skillSyncScanExternalToolsRoute } from '@shared/contracts/routes' import type { + AgentSkillLinkInput, ConflictStrategy, + AdoptAgentSkillInput, + AdoptAgentSkillPreview, + AdoptAgentSkillResult, ExportPreview, ExternalToolConfig, ImportPreview, + InstalledSkillAgent, + InstalledSkillAgentDetail, + LinkDeepChatSkillResult, + LinkDeepChatSkillsInput, + LinkDeepChatSkillsPreview, + LinkDeepChatSkillsResult, NewDiscovery, ScanResult, + SkillDetail, SyncResult } from '@shared/types/skillSync' import { getDeepchatBridge } from './core' @@ -53,6 +73,66 @@ export function createSkillSyncClient(bridge: DeepchatBridge = getDeepchatBridge return result.tools as ExternalToolConfig[] } + async function scanAgents(): Promise { + const result = await bridge.invoke(skillSyncScanAgentsRoute.name, {}) + return result.agents as InstalledSkillAgent[] + } + + async function getAgentDetail(agentId: string): Promise { + const result = await bridge.invoke(skillSyncGetAgentDetailRoute.name, { agentId }) + return result.agent as InstalledSkillAgentDetail + } + + async function getAgentSkillDetail(agentId: string, skillName: string): Promise { + const result = await bridge.invoke(skillSyncGetAgentSkillDetailRoute.name, { + agentId, + skillName + }) + return result.detail as SkillDetail + } + + async function previewAdoptAgentSkill( + input: AdoptAgentSkillInput + ): Promise { + const result = await bridge.invoke(skillSyncPreviewAdoptAgentSkillRoute.name, input) + return result.preview as AdoptAgentSkillPreview + } + + async function executeAdoptAgentSkill( + input: AdoptAgentSkillInput + ): Promise { + const result = await bridge.invoke(skillSyncExecuteAdoptAgentSkillRoute.name, input) + return result.result as AdoptAgentSkillResult + } + + async function previewLinkDeepChatSkills( + input: LinkDeepChatSkillsInput + ): Promise { + const result = await bridge.invoke(skillSyncPreviewLinkDeepChatSkillsRoute.name, input) + return result.preview as LinkDeepChatSkillsPreview + } + + async function executeLinkDeepChatSkills( + input: LinkDeepChatSkillsInput + ): Promise { + const result = await bridge.invoke(skillSyncExecuteLinkDeepChatSkillsRoute.name, input) + return result.result as LinkDeepChatSkillsResult + } + + async function repairAgentSkillLink( + input: AgentSkillLinkInput + ): Promise { + const result = await bridge.invoke(skillSyncRepairAgentSkillLinkRoute.name, input) + return result.result as LinkDeepChatSkillResult + } + + async function removeAgentSkillLink( + input: AgentSkillLinkInput + ): Promise { + const result = await bridge.invoke(skillSyncRemoveAgentSkillLinkRoute.name, input) + return result.result as LinkDeepChatSkillResult + } + async function previewImport(toolId: string, skillNames: string[]): Promise { const result = await bridge.invoke(skillSyncPreviewImportRoute.name, { toolId, @@ -163,6 +243,15 @@ export function createSkillSyncClient(bridge: DeepchatBridge = getDeepchatBridge getNewDiscoveries, acknowledgeDiscoveries, getRegisteredTools, + scanAgents, + getAgentDetail, + getAgentSkillDetail, + previewAdoptAgentSkill, + executeAdoptAgentSkill, + previewLinkDeepChatSkills, + executeLinkDeepChatSkills, + repairAgentSkillLink, + removeAgentSkillLink, previewImport, executeImport, previewExport, diff --git a/src/renderer/settings/components/McpSettings.vue b/src/renderer/settings/components/McpSettings.vue index b0f925ac6..39bad89f9 100644 --- a/src/renderer/settings/components/McpSettings.vue +++ b/src/renderer/settings/components/McpSettings.vue @@ -497,15 +497,23 @@ const closeMarketView = async () => { const nextQuery = { ...route.query } delete nextQuery.view + const routeName = + typeof router.hasRoute === 'function' && router.hasRoute('plugins-mcp') + ? 'plugins-mcp' + : 'settings-mcp' await router.replace({ - name: 'settings-mcp', + name: routeName, query: nextQuery }) } const openMarketView = async () => { + const routeName = + typeof router.hasRoute === 'function' && router.hasRoute('plugins-mcp') + ? 'plugins-mcp' + : 'settings-mcp' await router.push({ - name: 'settings-mcp', + name: routeName, query: { ...route.query, view: 'market' diff --git a/src/renderer/settings/components/RemoteSettings.vue b/src/renderer/settings/components/RemoteSettings.vue index 3579dc5ac..59f94dde9 100644 --- a/src/renderer/settings/components/RemoteSettings.vue +++ b/src/renderer/settings/components/RemoteSettings.vue @@ -1,5 +1,5 @@ diff --git a/src/renderer/settings/components/SettingsOverview.vue b/src/renderer/settings/components/SettingsOverview.vue index 6ac4a0035..0693bfbaf 100644 --- a/src/renderer/settings/components/SettingsOverview.vue +++ b/src/renderer/settings/components/SettingsOverview.vue @@ -41,18 +41,6 @@ interactive @select="openRoute('settings-provider')" /> - + + + + + {{ + preview?.conflict + ? t('settings.skills.agents.adoptDialog.conflictTitle') + : t('settings.skills.agents.adoptDialog.adoptTitle') + }} + + + {{ + preview?.conflict + ? t('settings.skills.agents.adoptDialog.conflictDescription', { + skill: preview.skillName, + agent: preview.agentName + }) + : t('settings.skills.agents.adoptDialog.adoptDescription') + }} + + + +
+ + {{ t('settings.skills.agents.adoptDialog.loading') }} +
+ +
+
+ {{ t('settings.skills.agents.adoptDialog.previewFailed') }} +
+
{{ error }}
+
+ +
+
+
+
+ {{ preview.skillName }} +
+
+ {{ preview.agentName }} +
+
+ + {{ t('settings.skills.agents.status.conflict') }} + +
+ +
+
+
+ {{ t('settings.skills.agents.adoptDialog.currentLocation') }} +
+
+ {{ preview.sourcePath }} +
+
+ +
+
+ {{ t('settings.skills.agents.adoptDialog.afterAdoption') }} +
+
+
+ {{ preview.targetPath }} +
+
+ {{ preview.agentPath }} {{ t('settings.skills.agents.adoptDialog.linkArrow') }} +
+
+
+ +
+
+ {{ t('settings.skills.agents.adoptDialog.backup') }} +
+
+ {{ preview.backupRoot }} +
+
+
+ +
+
+ + {{ t('settings.skills.agents.adoptDialog.chooseAction') }} +
+ +
+ + +
+
+ + +
+
+ + +
+
+

+ {{ t('settings.skills.agents.adoptDialog.unsupportedStrategies') }} +

+
+ +
+
+ {{ t('settings.skills.agents.adoptDialog.warnings') }} +
+
+ + {{ warning }} +
+
+
+ + + + + +
+ + + + diff --git a/src/renderer/settings/components/skills/AgentSkillTable.vue b/src/renderer/settings/components/skills/AgentSkillTable.vue new file mode 100644 index 000000000..11f7ac2c3 --- /dev/null +++ b/src/renderer/settings/components/skills/AgentSkillTable.vue @@ -0,0 +1,131 @@ + + + diff --git a/src/renderer/settings/components/skills/InstallFromGitDialog.vue b/src/renderer/settings/components/skills/InstallFromGitDialog.vue new file mode 100644 index 000000000..3be781e13 --- /dev/null +++ b/src/renderer/settings/components/skills/InstallFromGitDialog.vue @@ -0,0 +1,251 @@ + + + diff --git a/src/renderer/settings/components/skills/InstallSkillToAgentDialog.vue b/src/renderer/settings/components/skills/InstallSkillToAgentDialog.vue new file mode 100644 index 000000000..2c575d00c --- /dev/null +++ b/src/renderer/settings/components/skills/InstallSkillToAgentDialog.vue @@ -0,0 +1,305 @@ + + + diff --git a/src/renderer/settings/components/skills/SkillAgentsTab.vue b/src/renderer/settings/components/skills/SkillAgentsTab.vue new file mode 100644 index 000000000..5e89e3bdf --- /dev/null +++ b/src/renderer/settings/components/skills/SkillAgentsTab.vue @@ -0,0 +1,351 @@ + + + diff --git a/src/renderer/settings/components/skills/SkillCard.vue b/src/renderer/settings/components/skills/SkillCard.vue index 177d79b8c..b9c5beb49 100644 --- a/src/renderer/settings/components/skills/SkillCard.vue +++ b/src/renderer/settings/components/skills/SkillCard.vue @@ -1,8 +1,11 @@ diff --git a/src/renderer/settings/components/skills/SkillEditorSheet.vue b/src/renderer/settings/components/skills/SkillEditorSheet.vue deleted file mode 100644 index ec2a37e94..000000000 --- a/src/renderer/settings/components/skills/SkillEditorSheet.vue +++ /dev/null @@ -1,566 +0,0 @@ -