diff --git a/apps/cli/src/commands/results/remote.ts b/apps/cli/src/commands/results/remote.ts index 2e31b6527..e9a298c0e 100644 --- a/apps/cli/src/commands/results/remote.ts +++ b/apps/cli/src/commands/results/remote.ts @@ -496,12 +496,17 @@ export async function ensureRemoteRunAvailable( throw new Error(`Remote manifest path is outside the results repo clone: ${meta.path}`); } - // On the results branch runs live flat under `runs/` (the branch namespaces - // results), so strip that prefix to recover . - const relativeRunPath = path.posix.relative('runs', path.posix.dirname(relativeManifestPath)); + const relativeRunPath = relativeRunPathFromManifestPath(relativeManifestPath); await materializeGitRun(config.path, relativeRunPath, getResultsStorageRef(config)); } +function relativeRunPathFromManifestPath(relativeManifestPath: string): string { + const manifestDir = path.posix.dirname(relativeManifestPath); + return path.posix.basename(manifestDir) === '.internal' + ? path.posix.dirname(manifestDir) + : manifestDir; +} + export async function maybeAutoExportRunArtifacts( payload: RemoteExportPayload, ): Promise { diff --git a/apps/cli/src/commands/results/serve-file-tree.test.ts b/apps/cli/src/commands/results/serve-file-tree.test.ts index dad66935c..b17ee04a5 100644 --- a/apps/cli/src/commands/results/serve-file-tree.test.ts +++ b/apps/cli/src/commands/results/serve-file-tree.test.ts @@ -40,7 +40,7 @@ function gitTranscriptEntry(prefix: string): ArtifactCatalogEntry { kind: 'transcript', storage: 'git', ref: 'agentv/artifacts/v1', - key: `runs/2026-06-22T01-12-44-924Z/${prefix}/transcript.jsonl`, + key: `2026-06-22T01-12-44-924Z/${prefix}/transcript.jsonl`, }; } diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts index 12e944e35..01853b465 100644 --- a/apps/cli/src/commands/results/serve.ts +++ b/apps/cli/src/commands/results/serve.ts @@ -637,12 +637,18 @@ function manifestRecordSelection( function relativeRunPathFromNormalizedManifestPath(manifestPath: string): string | undefined { const parts = manifestPath.split('/').filter(Boolean); - const runsIndex = parts.lastIndexOf('runs'); const manifestName = parts.at(-1); - if (runsIndex === -1 || !manifestName || !isRunManifestPath(manifestName)) { + if (!manifestName || !isRunManifestPath(manifestName)) { return undefined; } - const runParts = parts.slice(runsIndex + 1, -1); + const manifestDirParts = parts.slice(0, -1); + if (manifestDirParts.at(-1) === '.internal') { + manifestDirParts.pop(); + } + if (manifestDirParts.length === 0 || manifestDirParts[0]?.startsWith('.')) { + return undefined; + } + const runParts = manifestDirParts; return runParts.length > 0 ? runParts.join('/') : undefined; } @@ -669,7 +675,7 @@ function sidecarArtifactKeyForPointer( artifact: ResolvedArtifactPointer, ): string | undefined { const publishedKey = artifact.key ? normalizeArtifactRelativePath(artifact.key) : undefined; - if (publishedKey?.startsWith('runs/')) { + if (publishedKey) { return publishedKey; } if (!artifact.path) { @@ -680,7 +686,7 @@ function sidecarArtifactKeyForPointer( if (!relativeArtifactPath || !relativeRunPath) { return undefined; } - return ['runs', relativeRunPath, relativeArtifactPath].join('/'); + return [relativeRunPath, relativeArtifactPath].join('/'); } async function readSidecarArtifactText( @@ -756,7 +762,7 @@ function displayPathFromArtifactKey(key: string | undefined, runPath: string | u const normalizedKey = key ? normalizeArtifactRelativePath(key) : undefined; if (!normalizedKey) return undefined; if (!runPath) return normalizedKey; - const runPrefix = `runs/${runPath}/`; + const runPrefix = `${runPath}/`; if (!normalizedKey.startsWith(runPrefix)) return normalizedKey; return normalizeArtifactRelativePath(normalizedKey.slice(runPrefix.length)) ?? normalizedKey; } diff --git a/apps/cli/test/commands/results/remote-auto-export.test.ts b/apps/cli/test/commands/results/remote-auto-export.test.ts index 191c9e6d2..7bbf5f938 100644 --- a/apps/cli/test/commands/results/remote-auto-export.test.ts +++ b/apps/cli/test/commands/results/remote-auto-export.test.ts @@ -205,7 +205,7 @@ describe('maybeAutoExportRunArtifacts', () => { expect(status).toBe('published'); expect(git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, rootDir)).toContain( - 'runs/run-001/index.jsonl', + 'run-001/index.jsonl', ); }, 20_000); @@ -227,21 +227,21 @@ describe('maybeAutoExportRunArtifacts', () => { `git --git-dir "${remoteDir}" ls-tree -r --name-only ${resultsBranch}`, rootDir, ); - expect(resultTree).toContain('runs/run-002/index.jsonl'); - expect(resultTree).toContain('runs/run-002/summary.json'); - expect(resultTree).not.toContain('runs/run-002/alpha/trace.json'); - expect(resultTree).not.toContain('runs/run-002/alpha/transcript.jsonl'); + expect(resultTree).toContain('run-002/index.jsonl'); + expect(resultTree).toContain('run-002/summary.json'); + expect(resultTree).not.toContain('run-002/alpha/trace.json'); + expect(resultTree).not.toContain('run-002/alpha/transcript.jsonl'); const index = JSON.parse( - git(`git --git-dir "${remoteDir}" show ${resultsBranch}:runs/run-002/index.jsonl`, rootDir), + git(`git --git-dir "${remoteDir}" show ${resultsBranch}:run-002/index.jsonl`, rootDir), ); expect(index.artifact_pointers).not.toHaveProperty('trace'); - expect(index.artifact_pointers.transcript.key).toBe('runs/run-002/alpha/transcript.jsonl'); + expect(index.artifact_pointers.transcript.key).toBe('run-002/alpha/transcript.jsonl'); const artifactTree = git( `git --git-dir "${remoteDir}" ls-tree -r --name-only ${AGENTV_RESULTS_ARTIFACTS_REF}`, rootDir, ); - expect(artifactTree).not.toContain('runs/run-002/alpha/trace.json'); - expect(artifactTree).toContain('runs/run-002/alpha/transcript.jsonl'); + expect(artifactTree).not.toContain('run-002/alpha/trace.json'); + expect(artifactTree).toContain('run-002/alpha/transcript.jsonl'); }, 20_000); it('returns already_published when the final results branch is already up to date', async () => { @@ -316,8 +316,8 @@ describe('maybeAutoExportRunArtifacts', () => { expect(status).toBe('published'); expect(git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, rootDir)).not.toContain( - 'runs/run-001/index.jsonl', + 'run-001/index.jsonl', ); - expect(git('git ls-tree -r --name-only main', cloneDir)).toContain('runs/run-001/index.jsonl'); + expect(git('git ls-tree -r --name-only main', cloneDir)).toContain('run-001/index.jsonl'); }); }); diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts index 0676c557a..5c1066e1d 100644 --- a/apps/cli/test/commands/results/serve.test.ts +++ b/apps/cli/test/commands/results/serve.test.ts @@ -256,7 +256,7 @@ function writeRemoteRunArtifact( /^(\d{4}-\d{2}-\d{2})T(\d{2})-(\d{2})-(\d{2})-(\d{3})Z$/, '$1T$2:$3:$4.$5Z', ); - const runDir = path.join(cloneDir, 'runs', timestamp); + const runDir = path.join(cloneDir, timestamp); mkdirSync(runDir, { recursive: true }); const records = Array.isArray(resultRecords) ? resultRecords : [resultRecords]; writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(...records)); @@ -297,7 +297,7 @@ function writeDirtyRemoteRunArtifact( /^(\d{4}-\d{2}-\d{2})T(\d{2})-(\d{2})-(\d{2})-(\d{3})Z$/, '$1T$2:$3:$4.$5Z', ); - const runDir = path.join(cloneDir, 'runs', timestamp); + const runDir = path.join(cloneDir, timestamp); mkdirSync(runDir, { recursive: true }); writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(resultRecord)); writeFileSync( @@ -1489,12 +1489,7 @@ describe('serve app', () => { writeResultsConfig(tempDir, { remote: `file://${remoteDir}`, path: cloneDir }); - const runManifestPath = path.join( - cloneDir, - 'runs', - '2026-03-26T11-00-00-000Z', - 'index.jsonl', - ); + const runManifestPath = path.join(cloneDir, '2026-03-26T11-00-00-000Z', 'index.jsonl'); expect(existsSync(runManifestPath)).toBe(false); const app = createApp([], tempDir, tempDir, undefined, { studioDir }); @@ -1893,7 +1888,7 @@ describe('serve app', () => { '2026-03-26T11-00-00-000Z', RESULT_A, ); - git('git add runs && git commit --quiet -m "remote result"', seedDir); + git('git add . && git commit --quiet -m "remote result"', seedDir); git('git push --quiet origin main', seedDir); const app = createApp([], tempDir, tempDir, undefined, { studioDir }); @@ -1918,7 +1913,7 @@ describe('serve app', () => { blocked: false, run_count: 1, }); - expect(existsSync(path.join(cloneDir, 'runs', runId, 'index.jsonl'))).toBe(true); + expect(existsSync(path.join(cloneDir, runId, 'index.jsonl'))).toBe(true); } finally { if (previousHome === undefined) { process.env.AGENTV_HOME = undefined; @@ -1979,7 +1974,7 @@ describe('serve app', () => { run_count: 1, }); expect(git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, tempDir)).toContain( - `runs/${runTimestamp}/index.jsonl`, + `${runTimestamp}/index.jsonl`, ); } finally { if (previousHome === undefined) { @@ -2080,14 +2075,14 @@ describe('serve app', () => { const cloneRunPath = path.join(cloneDir, relativeRunPath); mkdirSync(path.dirname(seedRunPath), { recursive: true }); writeFileSync(seedRunPath, `${JSON.stringify({ ...RESULT_A, score: 0.5 })}\n`); - git('git add runs && git commit --quiet -m "seed run artifact"', seedDir); + git('git add . && git commit --quiet -m "seed run artifact"', seedDir); git('git push --quiet origin main', seedDir); git('git pull --ff-only --quiet', cloneDir); writeFileSync(cloneRunPath, `${JSON.stringify({ ...RESULT_A, score: 0.75 })}\n`); - git('git add runs && git commit --quiet -m "local run edit"', cloneDir); + git('git add . && git commit --quiet -m "local run edit"', cloneDir); writeFileSync(seedRunPath, `${JSON.stringify({ ...RESULT_A, score: 0.25 })}\n`); - git('git add runs && git commit --quiet -m "remote run edit"', seedDir); + git('git add . && git commit --quiet -m "remote run edit"', seedDir); git('git push --quiet origin main', seedDir); git('git fetch --quiet origin --prune', cloneDir); git('git merge origin/main || true', cloneDir); @@ -2156,9 +2151,7 @@ describe('serve app', () => { expect(data.sync_status).toBe('clean'); expect(data.blocked).toBe(false); expect(data.run_count).toBe(1); - expect( - existsSync(path.join(cloneDir, 'runs', '2026-03-26T14-00-00-000Z', 'index.jsonl')), - ).toBe(true); + expect(existsSync(path.join(cloneDir, '2026-03-26T14-00-00-000Z', 'index.jsonl'))).toBe(true); expect(runId).toBe('2026-03-26T14-00-00-000Z'); }, 15000); @@ -2212,7 +2205,7 @@ describe('serve app', () => { blocked: false, run_count: 1, }); - expect(existsSync(path.join(cloneDir, 'runs', runId, 'index.jsonl'))).toBe(true); + expect(existsSync(path.join(cloneDir, runId, 'index.jsonl'))).toBe(true); } finally { if (previousHome === undefined) { process.env.AGENTV_HOME = undefined; @@ -3176,8 +3169,8 @@ describe('serve app', () => { const runId = `remote::${timestamp}`; const transcriptArtifactPath = 'demo/test-greeting/transcript.jsonl'; const traceArtifactPath = 'demo/test-greeting/trace.json'; - const transcriptKey = `runs/${timestamp}/${transcriptArtifactPath}`; - const traceKey = `runs/${timestamp}/${traceArtifactPath}`; + const transcriptKey = `${timestamp}/${transcriptArtifactPath}`; + const traceKey = `${timestamp}/${traceArtifactPath}`; const transcriptJsonl = `${JSON.stringify({ schema_version: 'agentv.transcript.v1', test_id: 'test-greeting', @@ -3197,7 +3190,7 @@ describe('serve app', () => { git(`git switch --quiet --orphan ${resultsBranch}`, seedDir); git('git rm -rf --quiet . 2>/dev/null || true', seedDir); - const runDir = path.join(seedDir, 'runs', timestamp); + const runDir = path.join(seedDir, timestamp); mkdirSync(runDir, { recursive: true }); writeFileSync( path.join(runDir, 'index.jsonl'), @@ -3235,7 +3228,7 @@ describe('serve app', () => { 2, ), ); - git('git add runs && git commit --quiet -m "seed metadata-only results"', seedDir); + git('git add . && git commit --quiet -m "seed metadata-only results"', seedDir); git(`git push --quiet origin HEAD:${resultsBranch}`, seedDir); git(`git switch --quiet --orphan ${AGENTV_RESULTS_ARTIFACTS_REF}`, seedDir); @@ -3245,7 +3238,7 @@ describe('serve app', () => { mkdirSync(path.dirname(transcriptPath), { recursive: true }); writeFileSync(transcriptPath, transcriptJsonl); writeFileSync(tracePath, traceJson); - git('git add runs && git commit --quiet -m "seed artifact sidecars"', seedDir); + git('git add . && git commit --quiet -m "seed artifact sidecars"', seedDir); git(`git push --quiet origin HEAD:${AGENTV_RESULTS_ARTIFACTS_REF}`, seedDir); git('git switch --quiet main', seedDir); diff --git a/apps/dashboard/src/lib/project-sync-status.test.ts b/apps/dashboard/src/lib/project-sync-status.test.ts index d152ccc29..6975bff52 100644 --- a/apps/dashboard/src/lib/project-sync-status.test.ts +++ b/apps/dashboard/src/lib/project-sync-status.test.ts @@ -18,19 +18,19 @@ describe('getProjectSyncView', () => { }); }); - it('surfaces dirty metadata as syncable without reset language', () => { + it('surfaces dirty result artifacts as syncable without reset language', () => { const view = getProjectSyncView({ configured: true, available: true, sync_status: 'dirty', - dirty_paths: ['metadata/runs/demo/tags.json'], + dirty_paths: ['demo-run/summary.json'], auto_push: false, }); expect(view).toMatchObject({ state: 'dirty', label: 'Dirty', - actionLabel: 'Sync Metadata', + actionLabel: 'Sync Results', canSync: true, }); expect(view.nextAction).toContain('no reset'); @@ -182,7 +182,7 @@ describe('buildProjectSyncFeedback', () => { expect(feedback.kind).toBe('success'); expect(feedback.message).toContain( - 'Sync completed: committed pending metadata, pulled remote results, pushed local results.', + 'Sync completed: committed pending results, pulled remote results, pushed local results.', ); }); diff --git a/apps/dashboard/src/lib/project-sync-status.ts b/apps/dashboard/src/lib/project-sync-status.ts index e6e45c229..467be30e3 100644 --- a/apps/dashboard/src/lib/project-sync-status.ts +++ b/apps/dashboard/src/lib/project-sync-status.ts @@ -218,13 +218,13 @@ export function getProjectSyncView( return { state: 'dirty', label: 'Dirty', - actionLabel: 'Sync Metadata', + actionLabel: 'Sync Results', tone: 'warn', - summary: status.block_reason ?? 'Local result metadata has pending edits.', + summary: status.block_reason ?? 'Local result artifacts have pending edits.', nextAction: status.auto_push === true - ? 'Sync Project will commit safe result metadata changes before syncing.' - : 'Review or commit the pending result metadata; no reset will be performed.', + ? 'Sync Project will commit safe result artifact changes before syncing.' + : 'Review or commit the pending result artifacts; no reset will be performed.', canSync: true, }; } @@ -261,7 +261,7 @@ export function getProjectSyncView( label: 'Clean', actionLabel: 'Sync Project', tone: 'good', - summary: 'Local and remote result metadata are in sync.', + summary: 'Local and remote results are in sync.', canSync: true, }; } @@ -332,7 +332,7 @@ export function buildProjectSyncFeedback(status: RemoteStatusResponse): { } const actions = [ - status.commit_created ? 'committed pending metadata' : undefined, + status.commit_created ? 'committed pending results' : undefined, status.pull_performed ? 'pulled remote results' : undefined, status.auto_merged_remote ? 'Merged remote (auto)' : undefined, status.push_performed ? 'pushed local results' : undefined, diff --git a/apps/web/src/content/docs/docs/next/tools/results.mdx b/apps/web/src/content/docs/docs/next/tools/results.mdx index e60ad51e1..64e04be8b 100644 --- a/apps/web/src/content/docs/docs/next/tools/results.mdx +++ b/apps/web/src/content/docs/docs/next/tools/results.mdx @@ -255,7 +255,7 @@ The CLI contract is deliberately narrow: `agentv results` manages local result a Use these supported remote workflows instead: -- **Automatic publishing:** configure `projects[].results` or top-level `results`; new `agentv eval` and `agentv pipeline bench` runs publish completed artifacts after the run completes. Use `results.repo` with `results.path` pointing at the source checkout and `results.branch: agentv/results/v1` to store primary result records on a dedicated branch of the source repo. AgentV never adds or rewrites remotes in an existing checkout; that checkout's `origin` must already point at the repository you want to fetch and push. AgentV reserves `agentv/results/v1` for primary results and `agentv/artifacts/v1` for heavy artifact payloads. When `index.jsonl` rows point trace or transcript payloads at `agentv/artifacts/v1`, automatic publishing stores those bytes on that artifact branch in the same remote and publishes pointer keys such as `runs//`. The configured results branch remains the metadata/control plane (`index.jsonl`, `summary.json`, tags, and pointers) instead of duplicating canonical trace/transcript payload bodies. Local pre-publish run workspaces can still contain those files beside the manifest so local tools keep working. Mutable run tags are stored as `tags.json` with a `tag_revision`; there is no tag event log in the normal results layout. `results.path` without `results.repo` means an existing local Git checkout, distinct from `workspace.repos[].repo`, which is a portable repository identity. Set `auto_push: true` to push after publish. In CI, use `agentv eval run --results-require-push` when push failures should fail that invocation after local artifacts are written. Non-fast-forward result branch pushes never force-push: AgentV auto-merges concurrent remote writes with artifact-aware Git merge drivers (a union driver for the append-only `index.jsonl`, a JSON-union driver for tag overlays) and pushes the merge as a fast-forward, and routes a genuine overlay conflict to a timestamped `agentv/results-sync/...` branch plus a GitHub compare/PR link for a human merge. While an eval is still running, [WIP checkpoints](/docs/tools/wip-checkpoints/) can keep partial run output durable on `agentv/wip/...` branches when auto-push is enabled. +- **Automatic publishing:** configure `projects[].results` or top-level `results`; new `agentv eval` and `agentv pipeline bench` runs publish completed artifacts after the run completes. Use `results.repo` with `results.path` pointing at the source checkout and `results.branch: agentv/results/v1` to store primary result records on a dedicated branch of the source repo. AgentV never adds or rewrites remotes in an existing checkout; that checkout's `origin` must already point at the repository you want to fetch and push. AgentV reserves `agentv/results/v1` for primary results and `agentv/artifacts/v1` for heavy artifact payloads. The results branch stores run bundles at `/` with `summary.json` at the run root and machine files such as the per-run JSONL index under `/.internal/`; cross-run derived catalogs live under `.indexes/`. When index rows point trace or transcript payloads at `agentv/artifacts/v1`, automatic publishing stores those bytes on the artifact branch in the same remote and publishes pointer keys such as `/`. Run tags are read from `summary.json` and index rows; there is no mutable `tags.json` overlay in the published results layout. `results.path` without `results.repo` means an existing local Git checkout, distinct from `workspace.repos[].repo`, which is a portable repository identity. Set `auto_push: true` to push after publish. In CI, use `agentv eval run --results-require-push` when push failures should fail that invocation after local artifacts are written. Non-fast-forward result branch pushes never force-push: AgentV auto-merges concurrent remote writes with artifact-aware Git merge rules for append-only JSONL indexes and pushes the merge as a fast-forward, routing genuine content conflicts to a timestamped `agentv/results-sync/...` branch plus a GitHub compare/PR link for a human merge. While an eval is still running, [WIP checkpoints](/docs/tools/wip-checkpoints/) can keep partial run output durable on `agentv/wip/...` branches when auto-push is enabled. - **Manual Dashboard sync:** run `agentv dashboard`, open the project, and use **Sync Project**. - **Manual API sync:** while Dashboard is running, call `GET /api/projects/:projectId/remote/status` or `POST /api/projects/:projectId/remote/sync` for project-scoped automation. Single-project sessions also expose `GET /api/remote/status` and `POST /api/remote/sync`. - **Git escape hatch:** for advanced recovery, inspect or repair the configured `projects[].results.path` clone with `git` directly, then sync again. diff --git a/apps/web/src/content/docs/docs/v4.42.4/tools/results.mdx b/apps/web/src/content/docs/docs/v4.42.4/tools/results.mdx index 5daf29f01..1c12cb71f 100644 --- a/apps/web/src/content/docs/docs/v4.42.4/tools/results.mdx +++ b/apps/web/src/content/docs/docs/v4.42.4/tools/results.mdx @@ -43,10 +43,10 @@ Examples: ```bash # Generate report.html next to the run manifest -agentv results report .agentv/results/runs/2026-03-14T10-32-00_claude +agentv results report .agentv/results/2026-03-14T10-32-00_claude # Use an explicit output path -agentv results report .agentv/results/runs/2026-03-14T10-32-00_claude/index.jsonl \ +agentv results report .agentv/results/2026-03-14T10-32-00_claude/index.jsonl \ --out ./reports/human-review.html ``` @@ -65,17 +65,17 @@ One minimal publication workflow is: ```bash # 1. Run an eval and sync or copy the run workspace into your public results repo. -agentv eval evals/demo.eval.yaml --output .agentv/results/runs/demo-live +agentv eval evals/demo.eval.yaml --output .agentv/results/demo-live # 2. In the public results repo, render the report into the Pages source directory. -agentv results report .agentv/results/runs/demo-live --out docs/index.html +agentv results report .agentv/results/demo-live --out docs/index.html # 3. Review the generated HTML before publishing. grep -RInE 'sk-[A-Za-z0-9]|Bearer |localhost|127\.0\.0\.1|/home/|/Users/|/tmp/' docs/index.html # 4. Commit the run artifacts and docs/index.html, then enable GitHub Pages # for the repository's docs/ directory or the branch used for Pages. -git add .agentv/results/runs/demo-live docs/index.html README.md +git add .agentv/results/demo-live docs/index.html README.md git commit -m "docs(results): publish static AgentV report" git push ``` @@ -116,10 +116,10 @@ Duplicate policy is explicit: For lightweight terminal workflows: ```bash -agentv results summary .agentv/results/runs/ -agentv results failures .agentv/results/runs/ -agentv results show .agentv/results/runs/ --test-id my-case -agentv results validate .agentv/results/runs/ +agentv results summary .agentv/results/ +agentv results failures .agentv/results/ +agentv results show .agentv/results/ --test-id my-case +agentv results validate .agentv/results/ ``` For a review-centric workflow built around these artifacts, see [Human Review Checkpoint](/docs/v4.42.4/guides/human-review/). diff --git a/packages/core/src/evaluation/results-repo-cache.test.ts b/packages/core/src/evaluation/results-repo-cache.test.ts index 38dce44c0..f41c865bf 100644 --- a/packages/core/src/evaluation/results-repo-cache.test.ts +++ b/packages/core/src/evaluation/results-repo-cache.test.ts @@ -34,10 +34,12 @@ function writeRun( score: number, executionStatus = 'ok', ): void { - const runDir = path.join(repoDir, 'runs', timestamp); + const runDir = path.join(repoDir, timestamp); + const internalDir = path.join(runDir, '.internal'); mkdirSync(runDir, { recursive: true }); + mkdirSync(internalDir, { recursive: true }); writeFileSync( - path.join(runDir, 'index.jsonl'), + path.join(internalDir, 'index.jsonl'), `${JSON.stringify({ timestamp, test_id: `${experiment}-case`, @@ -51,7 +53,7 @@ function writeRun( path.join(runDir, 'summary.json'), `${JSON.stringify( { - manifest_path: 'index.jsonl', + index_path: '.internal/index.jsonl', metadata: { display_name: `${experiment} ${timestamp}`, experiment, @@ -82,7 +84,7 @@ function createResultsRepo(tempRoot: string): string { git(repoDir, ['checkout', '--orphan', RESULTS_REF]); rmSync(path.join(repoDir, 'README.md'), { force: true }); writeRun(repoDir, 'default', '2026-06-28T00-00-00-000Z', 1); - git(repoDir, ['add', 'runs']); + git(repoDir, ['add', '.']); git(repoDir, ['commit', '-m', 'add first run']); return repoDir; } @@ -114,7 +116,7 @@ describe('git results filesystem index cache', () => { const runs = await listGitRunsCached(repoDir, RESULTS_REF); expect(runs).toHaveLength(1); expect(runs[0]?.run_id).toBe('2026-06-28T00-00-00-000Z'); - expect(runs[0]?.summary_path).toBe('runs/2026-06-28T00-00-00-000Z/summary.json'); + expect(runs[0]?.summary_path).toBe('2026-06-28T00-00-00-000Z/summary.json'); const cacheFile = resolveGitResultsIndexCacheFile({ repoDir, @@ -146,7 +148,7 @@ describe('git results filesystem index cache', () => { run_id: 'sentinel', experiment: 'default', timestamp: '2026-06-28T01-00-00-000Z', - manifest_path: 'runs/sentinel/index.jsonl', + manifest_path: 'sentinel/.internal/index.jsonl', display_name: 'from cache', test_count: 1, avg_score: 0.5, @@ -172,7 +174,7 @@ describe('git results filesystem index cache', () => { const firstCommit = await resolveGitRunsRefCommit(repoDir, RESULTS_REF); writeRun(repoDir, 'experiment-a', '2026-06-28T02-00-00-000Z', 0.25); - git(repoDir, ['add', 'runs']); + git(repoDir, ['add', '.']); git(repoDir, ['commit', '-m', 'add second run']); const secondCommit = await resolveGitRunsRefCommit(repoDir, RESULTS_REF); @@ -211,7 +213,7 @@ describe('git results filesystem index cache', () => { it('preserves execution error counts for remote-only list metadata', async () => { const repoDir = createResultsRepo(tempRoot); writeRun(repoDir, 'error-experiment', '2026-06-28T03-00-00-000Z', 0, 'execution_error'); - git(repoDir, ['add', 'runs']); + git(repoDir, ['add', '.']); git(repoDir, ['commit', '-m', 'add execution error run']); const runs = await listGitRunsCached(repoDir, RESULTS_REF); diff --git a/packages/core/src/evaluation/results-repo.ts b/packages/core/src/evaluation/results-repo.ts index 5c77d8024..cd7124142 100644 --- a/packages/core/src/evaluation/results-repo.ts +++ b/packages/core/src/evaluation/results-repo.ts @@ -30,16 +30,16 @@ const execFileAsync = promisify(execFile); // write runs here. This is NOT the on-branch layout — see RESULTS_REPO_RUNS_DIR. const RESULTS_REPO_RESULTS_DIR = '.agentv/results'; // On-branch / results-repo-clone storage layout. The results branch (e.g. -// agentv/results/v1) already namespaces results, so runs are stored flat at -// runs// and the editable tag overlays at metadata/runs// — -// no redundant `.agentv/results/` prefix. -const RESULTS_REPO_RUNS_DIR = 'runs'; -const RESULTS_REPO_METADATA_DIR = 'metadata'; +// agentv/results/v1) already namespaces results, so run bundles are stored at +// the branch root as / with ADR-0017 internals preserved. +const RESULTS_REPO_RUNS_DIR = '.'; +const RESULTS_REPO_INDEXES_DIR = '.indexes'; +const RESULTS_REPO_CACHE_DIR = '.cache'; // Top-level directories AgentV owns on the results branch. The auto-sync // dirty-commit path stages only these so it never touches unrelated repo files. -const RESULTS_REPO_TRACKED_DIRS = [RESULTS_REPO_RUNS_DIR, RESULTS_REPO_METADATA_DIR] as const; +const RESULTS_REPO_TRACKED_DOT_DIRS = [RESULTS_REPO_INDEXES_DIR, RESULTS_REPO_CACHE_DIR] as const; const GIT_RESULTS_INDEX_CACHE_SCHEMA_VERSION = 'agentv.git_results_index_cache.v1'; -const GIT_RESULTS_INDEX_LAYOUT_VERSION = 'agentv.results_repo_runs.v1'; +const GIT_RESULTS_INDEX_LAYOUT_VERSION = 'agentv.results_repo_branch_root.v1'; const FALLBACK_RESULTS_REPO_COMMIT_EMAIL = 'agentv@results-repo'; const FALLBACK_RESULTS_REPO_COMMIT_NAME = 'AgentV Results'; const GIT_COMMIT_IDENTITY_ENV_KEYS = [ @@ -68,135 +68,15 @@ const RESULTS_REPO_GENESIS_MESSAGE = 'chore(results): initialize AgentV results const RESULTS_REPO_GENESIS_DATE = '@0 +0000'; const RESULT_INDEX_FILENAME = 'index.jsonl'; -// Artifact-aware merge config for the AgentV-owned results checkout. These two -// pieces let `git merge` reconcile concurrent result writes automatically so -// results sync never has to force-push (see resolveResultBranchPushConflict): -// - `.gitattributes` (committed on the results branch) maps the append-only -// run index to git's stock `union` driver and the editable JSON overlay to -// our `agentv-json` driver. -// - `merge.agentv-json.driver` (registered once in the checkout's local git -// config) points at a tiny 3-way JSON set/field union script. -// Run bundles under runs//** are uniquely pathed, so a 3-way merge -// never conflicts on them and they need no attribute. +// Artifact-aware merge config for the AgentV-owned results checkout. Concurrent +// writers append to rebuildable cross-run JSONL catalogs and each run's +// per-run JSONL index; git's stock `union` driver can reconcile those appends. +// Run bundles under /** are uniquely pathed, so a 3-way merge usually +// never conflicts on them. const RESULTS_REPO_GITATTRIBUTES_FILE = '.gitattributes'; const RESULTS_REPO_GITATTRIBUTES_CONTENT = `# Managed by AgentV. Artifact-aware merge so results sync never force-pushes. -# Append-only run manifests: union concurrent appends (lines are orthogonal). -index.jsonl merge=union -# Editable run overlay (tags): 3-way JSON set/field union via the -# agentv-json driver; a genuine scalar conflict falls through to a human merge. -metadata/runs/**/*.json merge=agentv-json -`; -const RESULTS_JSON_MERGE_DRIVER_NAME = 'agentv-json'; -// Materialized into the results checkout's git dir and invoked by git as -// `node