Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions apps/cli/src/commands/results/remote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -496,12 +496,17 @@ export async function ensureRemoteRunAvailable(
throw new Error(`Remote manifest path is outside the results repo clone: ${meta.path}`);
}

// On the results branch runs live flat under `runs/` (the branch namespaces
// results), so strip that prefix to recover <run_id>.
const relativeRunPath = path.posix.relative('runs', path.posix.dirname(relativeManifestPath));
const relativeRunPath = relativeRunPathFromManifestPath(relativeManifestPath);
await materializeGitRun(config.path, relativeRunPath, getResultsStorageRef(config));
}

function relativeRunPathFromManifestPath(relativeManifestPath: string): string {
const manifestDir = path.posix.dirname(relativeManifestPath);
return path.posix.basename(manifestDir) === '.internal'
? path.posix.dirname(manifestDir)
: manifestDir;
}

export async function maybeAutoExportRunArtifacts(
payload: RemoteExportPayload,
): Promise<RemoteExportStatus> {
Expand Down
2 changes: 1 addition & 1 deletion apps/cli/src/commands/results/serve-file-tree.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function gitTranscriptEntry(prefix: string): ArtifactCatalogEntry {
kind: 'transcript',
storage: 'git',
ref: 'agentv/artifacts/v1',
key: `runs/2026-06-22T01-12-44-924Z/${prefix}/transcript.jsonl`,
key: `2026-06-22T01-12-44-924Z/${prefix}/transcript.jsonl`,
};
}

Expand Down
18 changes: 12 additions & 6 deletions apps/cli/src/commands/results/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -637,12 +637,18 @@ function manifestRecordSelection(

function relativeRunPathFromNormalizedManifestPath(manifestPath: string): string | undefined {
const parts = manifestPath.split('/').filter(Boolean);
const runsIndex = parts.lastIndexOf('runs');
const manifestName = parts.at(-1);
if (runsIndex === -1 || !manifestName || !isRunManifestPath(manifestName)) {
if (!manifestName || !isRunManifestPath(manifestName)) {
return undefined;
}
const runParts = parts.slice(runsIndex + 1, -1);
const manifestDirParts = parts.slice(0, -1);
if (manifestDirParts.at(-1) === '.internal') {
manifestDirParts.pop();
}
if (manifestDirParts.length === 0 || manifestDirParts[0]?.startsWith('.')) {
return undefined;
}
const runParts = manifestDirParts;
return runParts.length > 0 ? runParts.join('/') : undefined;
}

Expand All @@ -669,7 +675,7 @@ function sidecarArtifactKeyForPointer(
artifact: ResolvedArtifactPointer,
): string | undefined {
const publishedKey = artifact.key ? normalizeArtifactRelativePath(artifact.key) : undefined;
if (publishedKey?.startsWith('runs/')) {
if (publishedKey) {
return publishedKey;
}
if (!artifact.path) {
Expand All @@ -680,7 +686,7 @@ function sidecarArtifactKeyForPointer(
if (!relativeArtifactPath || !relativeRunPath) {
return undefined;
}
return ['runs', relativeRunPath, relativeArtifactPath].join('/');
return [relativeRunPath, relativeArtifactPath].join('/');
}

async function readSidecarArtifactText(
Expand Down Expand Up @@ -756,7 +762,7 @@ function displayPathFromArtifactKey(key: string | undefined, runPath: string | u
const normalizedKey = key ? normalizeArtifactRelativePath(key) : undefined;
if (!normalizedKey) return undefined;
if (!runPath) return normalizedKey;
const runPrefix = `runs/${runPath}/`;
const runPrefix = `${runPath}/`;
if (!normalizedKey.startsWith(runPrefix)) return normalizedKey;
return normalizeArtifactRelativePath(normalizedKey.slice(runPrefix.length)) ?? normalizedKey;
}
Expand Down
22 changes: 11 additions & 11 deletions apps/cli/test/commands/results/remote-auto-export.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ describe('maybeAutoExportRunArtifacts', () => {

expect(status).toBe('published');
expect(git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, rootDir)).toContain(
'runs/run-001/index.jsonl',
'run-001/index.jsonl',
);
}, 20_000);

Expand All @@ -227,21 +227,21 @@ describe('maybeAutoExportRunArtifacts', () => {
`git --git-dir "${remoteDir}" ls-tree -r --name-only ${resultsBranch}`,
rootDir,
);
expect(resultTree).toContain('runs/run-002/index.jsonl');
expect(resultTree).toContain('runs/run-002/summary.json');
expect(resultTree).not.toContain('runs/run-002/alpha/trace.json');
expect(resultTree).not.toContain('runs/run-002/alpha/transcript.jsonl');
expect(resultTree).toContain('run-002/index.jsonl');
expect(resultTree).toContain('run-002/summary.json');
expect(resultTree).not.toContain('run-002/alpha/trace.json');
expect(resultTree).not.toContain('run-002/alpha/transcript.jsonl');
const index = JSON.parse(
git(`git --git-dir "${remoteDir}" show ${resultsBranch}:runs/run-002/index.jsonl`, rootDir),
git(`git --git-dir "${remoteDir}" show ${resultsBranch}:run-002/index.jsonl`, rootDir),
);
expect(index.artifact_pointers).not.toHaveProperty('trace');
expect(index.artifact_pointers.transcript.key).toBe('runs/run-002/alpha/transcript.jsonl');
expect(index.artifact_pointers.transcript.key).toBe('run-002/alpha/transcript.jsonl');
const artifactTree = git(
`git --git-dir "${remoteDir}" ls-tree -r --name-only ${AGENTV_RESULTS_ARTIFACTS_REF}`,
rootDir,
);
expect(artifactTree).not.toContain('runs/run-002/alpha/trace.json');
expect(artifactTree).toContain('runs/run-002/alpha/transcript.jsonl');
expect(artifactTree).not.toContain('run-002/alpha/trace.json');
expect(artifactTree).toContain('run-002/alpha/transcript.jsonl');
}, 20_000);

it('returns already_published when the final results branch is already up to date', async () => {
Expand Down Expand Up @@ -316,8 +316,8 @@ describe('maybeAutoExportRunArtifacts', () => {

expect(status).toBe('published');
expect(git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, rootDir)).not.toContain(
'runs/run-001/index.jsonl',
'run-001/index.jsonl',
);
expect(git('git ls-tree -r --name-only main', cloneDir)).toContain('runs/run-001/index.jsonl');
expect(git('git ls-tree -r --name-only main', cloneDir)).toContain('run-001/index.jsonl');
});
});
39 changes: 16 additions & 23 deletions apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ function writeRemoteRunArtifact(
/^(\d{4}-\d{2}-\d{2})T(\d{2})-(\d{2})-(\d{2})-(\d{3})Z$/,
'$1T$2:$3:$4.$5Z',
);
const runDir = path.join(cloneDir, 'runs', timestamp);
const runDir = path.join(cloneDir, timestamp);
mkdirSync(runDir, { recursive: true });
const records = Array.isArray(resultRecords) ? resultRecords : [resultRecords];
writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(...records));
Expand Down Expand Up @@ -297,7 +297,7 @@ function writeDirtyRemoteRunArtifact(
/^(\d{4}-\d{2}-\d{2})T(\d{2})-(\d{2})-(\d{2})-(\d{3})Z$/,
'$1T$2:$3:$4.$5Z',
);
const runDir = path.join(cloneDir, 'runs', timestamp);
const runDir = path.join(cloneDir, timestamp);
mkdirSync(runDir, { recursive: true });
writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(resultRecord));
writeFileSync(
Expand Down Expand Up @@ -1489,12 +1489,7 @@ describe('serve app', () => {

writeResultsConfig(tempDir, { remote: `file://${remoteDir}`, path: cloneDir });

const runManifestPath = path.join(
cloneDir,
'runs',
'2026-03-26T11-00-00-000Z',
'index.jsonl',
);
const runManifestPath = path.join(cloneDir, '2026-03-26T11-00-00-000Z', 'index.jsonl');
expect(existsSync(runManifestPath)).toBe(false);

const app = createApp([], tempDir, tempDir, undefined, { studioDir });
Expand Down Expand Up @@ -1893,7 +1888,7 @@ describe('serve app', () => {
'2026-03-26T11-00-00-000Z',
RESULT_A,
);
git('git add runs && git commit --quiet -m "remote result"', seedDir);
git('git add . && git commit --quiet -m "remote result"', seedDir);
git('git push --quiet origin main', seedDir);

const app = createApp([], tempDir, tempDir, undefined, { studioDir });
Expand All @@ -1918,7 +1913,7 @@ describe('serve app', () => {
blocked: false,
run_count: 1,
});
expect(existsSync(path.join(cloneDir, 'runs', runId, 'index.jsonl'))).toBe(true);
expect(existsSync(path.join(cloneDir, runId, 'index.jsonl'))).toBe(true);
} finally {
if (previousHome === undefined) {
process.env.AGENTV_HOME = undefined;
Expand Down Expand Up @@ -1979,7 +1974,7 @@ describe('serve app', () => {
run_count: 1,
});
expect(git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, tempDir)).toContain(
`runs/${runTimestamp}/index.jsonl`,
`${runTimestamp}/index.jsonl`,
);
} finally {
if (previousHome === undefined) {
Expand Down Expand Up @@ -2080,14 +2075,14 @@ describe('serve app', () => {
const cloneRunPath = path.join(cloneDir, relativeRunPath);
mkdirSync(path.dirname(seedRunPath), { recursive: true });
writeFileSync(seedRunPath, `${JSON.stringify({ ...RESULT_A, score: 0.5 })}\n`);
git('git add runs && git commit --quiet -m "seed run artifact"', seedDir);
git('git add . && git commit --quiet -m "seed run artifact"', seedDir);
git('git push --quiet origin main', seedDir);
git('git pull --ff-only --quiet', cloneDir);

writeFileSync(cloneRunPath, `${JSON.stringify({ ...RESULT_A, score: 0.75 })}\n`);
git('git add runs && git commit --quiet -m "local run edit"', cloneDir);
git('git add . && git commit --quiet -m "local run edit"', cloneDir);
writeFileSync(seedRunPath, `${JSON.stringify({ ...RESULT_A, score: 0.25 })}\n`);
git('git add runs && git commit --quiet -m "remote run edit"', seedDir);
git('git add . && git commit --quiet -m "remote run edit"', seedDir);
git('git push --quiet origin main', seedDir);
git('git fetch --quiet origin --prune', cloneDir);
git('git merge origin/main || true', cloneDir);
Expand Down Expand Up @@ -2156,9 +2151,7 @@ describe('serve app', () => {
expect(data.sync_status).toBe('clean');
expect(data.blocked).toBe(false);
expect(data.run_count).toBe(1);
expect(
existsSync(path.join(cloneDir, 'runs', '2026-03-26T14-00-00-000Z', 'index.jsonl')),
).toBe(true);
expect(existsSync(path.join(cloneDir, '2026-03-26T14-00-00-000Z', 'index.jsonl'))).toBe(true);
expect(runId).toBe('2026-03-26T14-00-00-000Z');
}, 15000);

Expand Down Expand Up @@ -2212,7 +2205,7 @@ describe('serve app', () => {
blocked: false,
run_count: 1,
});
expect(existsSync(path.join(cloneDir, 'runs', runId, 'index.jsonl'))).toBe(true);
expect(existsSync(path.join(cloneDir, runId, 'index.jsonl'))).toBe(true);
} finally {
if (previousHome === undefined) {
process.env.AGENTV_HOME = undefined;
Expand Down Expand Up @@ -3176,8 +3169,8 @@ describe('serve app', () => {
const runId = `remote::${timestamp}`;
const transcriptArtifactPath = 'demo/test-greeting/transcript.jsonl';
const traceArtifactPath = 'demo/test-greeting/trace.json';
const transcriptKey = `runs/${timestamp}/${transcriptArtifactPath}`;
const traceKey = `runs/${timestamp}/${traceArtifactPath}`;
const transcriptKey = `${timestamp}/${transcriptArtifactPath}`;
const traceKey = `${timestamp}/${traceArtifactPath}`;
const transcriptJsonl = `${JSON.stringify({
schema_version: 'agentv.transcript.v1',
test_id: 'test-greeting',
Expand All @@ -3197,7 +3190,7 @@ describe('serve app', () => {

git(`git switch --quiet --orphan ${resultsBranch}`, seedDir);
git('git rm -rf --quiet . 2>/dev/null || true', seedDir);
const runDir = path.join(seedDir, 'runs', timestamp);
const runDir = path.join(seedDir, timestamp);
mkdirSync(runDir, { recursive: true });
writeFileSync(
path.join(runDir, 'index.jsonl'),
Expand Down Expand Up @@ -3235,7 +3228,7 @@ describe('serve app', () => {
2,
),
);
git('git add runs && git commit --quiet -m "seed metadata-only results"', seedDir);
git('git add . && git commit --quiet -m "seed metadata-only results"', seedDir);
git(`git push --quiet origin HEAD:${resultsBranch}`, seedDir);

git(`git switch --quiet --orphan ${AGENTV_RESULTS_ARTIFACTS_REF}`, seedDir);
Expand All @@ -3245,7 +3238,7 @@ describe('serve app', () => {
mkdirSync(path.dirname(transcriptPath), { recursive: true });
writeFileSync(transcriptPath, transcriptJsonl);
writeFileSync(tracePath, traceJson);
git('git add runs && git commit --quiet -m "seed artifact sidecars"', seedDir);
git('git add . && git commit --quiet -m "seed artifact sidecars"', seedDir);
git(`git push --quiet origin HEAD:${AGENTV_RESULTS_ARTIFACTS_REF}`, seedDir);
git('git switch --quiet main', seedDir);

Expand Down
8 changes: 4 additions & 4 deletions apps/dashboard/src/lib/project-sync-status.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ describe('getProjectSyncView', () => {
});
});

it('surfaces dirty metadata as syncable without reset language', () => {
it('surfaces dirty result artifacts as syncable without reset language', () => {
const view = getProjectSyncView({
configured: true,
available: true,
sync_status: 'dirty',
dirty_paths: ['metadata/runs/demo/tags.json'],
dirty_paths: ['demo-run/summary.json'],
auto_push: false,
});

expect(view).toMatchObject({
state: 'dirty',
label: 'Dirty',
actionLabel: 'Sync Metadata',
actionLabel: 'Sync Results',
canSync: true,
});
expect(view.nextAction).toContain('no reset');
Expand Down Expand Up @@ -182,7 +182,7 @@ describe('buildProjectSyncFeedback', () => {

expect(feedback.kind).toBe('success');
expect(feedback.message).toContain(
'Sync completed: committed pending metadata, pulled remote results, pushed local results.',
'Sync completed: committed pending results, pulled remote results, pushed local results.',
);
});

Expand Down
12 changes: 6 additions & 6 deletions apps/dashboard/src/lib/project-sync-status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,13 @@ export function getProjectSyncView(
return {
state: 'dirty',
label: 'Dirty',
actionLabel: 'Sync Metadata',
actionLabel: 'Sync Results',
tone: 'warn',
summary: status.block_reason ?? 'Local result metadata has pending edits.',
summary: status.block_reason ?? 'Local result artifacts have pending edits.',
nextAction:
status.auto_push === true
? 'Sync Project will commit safe result metadata changes before syncing.'
: 'Review or commit the pending result metadata; no reset will be performed.',
? 'Sync Project will commit safe result artifact changes before syncing.'
: 'Review or commit the pending result artifacts; no reset will be performed.',
canSync: true,
};
}
Expand Down Expand Up @@ -261,7 +261,7 @@ export function getProjectSyncView(
label: 'Clean',
actionLabel: 'Sync Project',
tone: 'good',
summary: 'Local and remote result metadata are in sync.',
summary: 'Local and remote results are in sync.',
canSync: true,
};
}
Expand Down Expand Up @@ -332,7 +332,7 @@ export function buildProjectSyncFeedback(status: RemoteStatusResponse): {
}

const actions = [
status.commit_created ? 'committed pending metadata' : undefined,
status.commit_created ? 'committed pending results' : undefined,
status.pull_performed ? 'pulled remote results' : undefined,
status.auto_merged_remote ? 'Merged remote (auto)' : undefined,
status.push_performed ? 'pushed local results' : undefined,
Expand Down
2 changes: 1 addition & 1 deletion apps/web/src/content/docs/docs/next/tools/results.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ The CLI contract is deliberately narrow: `agentv results` manages local result a

Use these supported remote workflows instead:

- **Automatic publishing:** configure `projects[].results` or top-level `results`; new `agentv eval` and `agentv pipeline bench` runs publish completed artifacts after the run completes. Use `results.repo` with `results.path` pointing at the source checkout and `results.branch: agentv/results/v1` to store primary result records on a dedicated branch of the source repo. AgentV never adds or rewrites remotes in an existing checkout; that checkout's `origin` must already point at the repository you want to fetch and push. AgentV reserves `agentv/results/v1` for primary results and `agentv/artifacts/v1` for heavy artifact payloads. When `index.jsonl` rows point trace or transcript payloads at `agentv/artifacts/v1`, automatic publishing stores those bytes on that artifact branch in the same remote and publishes pointer keys such as `runs/<run_id>/<pointer.path>`. The configured results branch remains the metadata/control plane (`index.jsonl`, `summary.json`, tags, and pointers) instead of duplicating canonical trace/transcript payload bodies. Local pre-publish run workspaces can still contain those files beside the manifest so local tools keep working. Mutable run tags are stored as `tags.json` with a `tag_revision`; there is no tag event log in the normal results layout. `results.path` without `results.repo` means an existing local Git checkout, distinct from `workspace.repos[].repo`, which is a portable repository identity. Set `auto_push: true` to push after publish. In CI, use `agentv eval run --results-require-push` when push failures should fail that invocation after local artifacts are written. Non-fast-forward result branch pushes never force-push: AgentV auto-merges concurrent remote writes with artifact-aware Git merge drivers (a union driver for the append-only `index.jsonl`, a JSON-union driver for tag overlays) and pushes the merge as a fast-forward, and routes a genuine overlay conflict to a timestamped `agentv/results-sync/...` branch plus a GitHub compare/PR link for a human merge. While an eval is still running, [WIP checkpoints](/docs/tools/wip-checkpoints/) can keep partial run output durable on `agentv/wip/...` branches when auto-push is enabled.
- **Automatic publishing:** configure `projects[].results` or top-level `results`; new `agentv eval` and `agentv pipeline bench` runs publish completed artifacts after the run completes. Use `results.repo` with `results.path` pointing at the source checkout and `results.branch: agentv/results/v1` to store primary result records on a dedicated branch of the source repo. AgentV never adds or rewrites remotes in an existing checkout; that checkout's `origin` must already point at the repository you want to fetch and push. AgentV reserves `agentv/results/v1` for primary results and `agentv/artifacts/v1` for heavy artifact payloads. The results branch stores run bundles at `<run_id>/` with `summary.json` at the run root and machine files such as the per-run JSONL index under `<run_id>/.internal/`; cross-run derived catalogs live under `.indexes/`. When index rows point trace or transcript payloads at `agentv/artifacts/v1`, automatic publishing stores those bytes on the artifact branch in the same remote and publishes pointer keys such as `<run_id>/<pointer.path>`. Run tags are read from `summary.json` and index rows; there is no mutable `tags.json` overlay in the published results layout. `results.path` without `results.repo` means an existing local Git checkout, distinct from `workspace.repos[].repo`, which is a portable repository identity. Set `auto_push: true` to push after publish. In CI, use `agentv eval run --results-require-push` when push failures should fail that invocation after local artifacts are written. Non-fast-forward result branch pushes never force-push: AgentV auto-merges concurrent remote writes with artifact-aware Git merge rules for append-only JSONL indexes and pushes the merge as a fast-forward, routing genuine content conflicts to a timestamped `agentv/results-sync/...` branch plus a GitHub compare/PR link for a human merge. While an eval is still running, [WIP checkpoints](/docs/tools/wip-checkpoints/) can keep partial run output durable on `agentv/wip/...` branches when auto-push is enabled.
- **Manual Dashboard sync:** run `agentv dashboard`, open the project, and use **Sync Project**.
- **Manual API sync:** while Dashboard is running, call `GET /api/projects/:projectId/remote/status` or `POST /api/projects/:projectId/remote/sync` for project-scoped automation. Single-project sessions also expose `GET /api/remote/status` and `POST /api/remote/sync`.
- **Git escape hatch:** for advanced recovery, inspect or repair the configured `projects[].results.path` clone with `git` directly, then sync again.
Loading
Loading