diff --git a/.github/skills/new-java-e2e-test-yaml-and-test/SKILL.md b/.github/skills/new-java-e2e-test-yaml-and-test/SKILL.md new file mode 100644 index 000000000..060228d55 --- /dev/null +++ b/.github/skills/new-java-e2e-test-yaml-and-test/SKILL.md @@ -0,0 +1,222 @@ +--- +name: new-java-e2e-test-yaml-and-test +description: "Use this skill when creating a new Java E2E integration test (failsafe IT) that requires a new replay proxy YAML snapshot file in test/snapshots/" +--- + +# Creating a New Java E2E Test with a Replay Proxy YAML Snapshot + +This skill covers the complete workflow for adding a new Java failsafe +integration test backed by a handcrafted YAML snapshot for the replay proxy. + +## Overview + +The Java E2E tests use a **replay proxy** (`test/harness/replayingCapiProxy.ts`) +that intercepts HTTP calls to the Copilot API and returns pre-recorded responses +from YAML snapshot files. This avoids needing real authentication in CI. + +**Key constraint:** Java's `CapiProxy.java` always sets `GITHUB_ACTIONS=true` +(line 104), which forces the replay proxy into read-only mode. You **cannot** +record snapshots by running Java tests — you must handcraft the YAML. + +## Step-by-Step Workflow + +### Step 1: Choose a snapshot category and snapshot base name + +- Category = a directory under `test/snapshots/` (e.g., `system_message_sections`) +- Snapshot base name = the exact filename stem to use (already lowercase/underscore-separated), + e.g., `should_use_replaced_identity_section_in_response` +- Resulting file: `test/snapshots//.yaml` + +### Step 2: Create the YAML snapshot file + +The format is: + +```yaml +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: + - role: assistant + content: +``` + +**Rules:** +- `${system}` is a placeholder that matches ANY system message content +- `${workdir}` in tool arguments is substituted with the actual temp workDir +- Each conversation entry represents one request-response exchange +- For multi-turn, add multiple conversation entries +- For tool calls, include `tool_calls` on assistant messages and `role: tool` for results +- The user content must **exactly match** what your test sends (after normalization) + +### Step 3: Create the Java IT test class + +Place it in `java/src/test/java/com/github/copilot/` with an `IT` suffix +(e.g., `MyFeatureIT.java`). The failsafe plugin picks up `*IT.java` files. + +**Template:** + +```java +package com.github.copilot; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.concurrent.TimeUnit; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import com.github.copilot.generated.AssistantMessageEvent; +import com.github.copilot.rpc.MessageOptions; +import com.github.copilot.rpc.PermissionHandler; +import com.github.copilot.rpc.SessionConfig; +// ... other imports as needed + +class MyFeatureIT { + + private static E2ETestContext ctx; + + @BeforeAll + static void setUp() throws Exception { + ctx = E2ETestContext.create(); + } + + @AfterAll + static void tearDown() throws Exception { + if (ctx != null) { + ctx.close(); + } + } + + @Test + void myTestMethod() throws Exception { + // 1. Configure the proxy to use your snapshot + ctx.configureForTest("my_category", "my_test_method"); + + // 2. Create a client (uses fake token + proxy automatically) + try (CopilotClient client = ctx.createClient()) { + + // 3. Create a session with desired config + CopilotSession session = client.createSession(new SessionConfig() + .setOnPermissionRequest(PermissionHandler.APPROVE_ALL)) + .get(30, TimeUnit.SECONDS); + + try { + // 4. Send the prompt (must match YAML exactly) + AssistantMessageEvent response = session + .sendAndWait(new MessageOptions().setPrompt("Your prompt here"), 60_000) + .get(90, TimeUnit.SECONDS); + + // 5. Assert on the response + assertNotNull(response); + String content = response.getData().content(); + assertTrue(content.contains("expected text")); + } finally { + session.close(); + } + } + } +} +``` + +### Step 4: Verify + +```sh +cd java +mvn spotless:apply +mvn failsafe:integration-test -Dit.test="MyFeatureIT#myTestMethod" -Denforcer.skip=true +``` + +Then run the full build to confirm no regressions: + +```sh +mvn clean verify +``` + +## Key Classes and Files + +| What | Where | +|------|-------| +| Test context (manages proxy, workDir, CLI) | `java/src/test/java/com/github/copilot/E2ETestContext.java` | +| Java proxy wrapper | `java/src/test/java/com/github/copilot/CapiProxy.java` | +| Replay proxy (TypeScript) | `test/harness/replayingCapiProxy.ts` | +| Proxy server entry point | `test/harness/server.ts` | +| Snapshot files | `test/snapshots//.yaml` | +| Existing IT tests for reference | `java/src/test/java/com/github/copilot/*IT.java` | + +## How the Proxy Matches Requests + +1. The proxy normalizes the incoming request's messages +2. It compares against each conversation in the YAML: + - System message matches if YAML has `${system}` (wildcard) + - User messages are compared by content (exact text match) + - Tool results are compared after normalizing `${workdir}` paths +3. If a match is found, the proxy returns the **next assistant message after the matched request prefix** +4. If no match, in CI mode (`GITHUB_ACTIONS=true`) it errors with "No cached response found" + +## YAML Format for Tool Calls + +If your test involves tool use: + +```yaml +conversations: + # First exchange: model wants to call a tool + - messages: + - role: system + content: ${system} + - role: user + content: Read the file test.txt + - role: assistant + content: I'll read that file. + tool_calls: + - id: toolcall_0 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.txt"}' + # Second exchange: after tool result is provided, model gives final answer + - messages: + - role: system + content: ${system} + - role: user + content: Read the file test.txt + - role: assistant + content: I'll read that file. + tool_calls: + - id: toolcall_0 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.txt"}' + - role: tool + tool_call_id: toolcall_0 + content: "1. Hello world!" + - role: assistant + content: The file test.txt contains "Hello world!" +``` + +**Important:** When the model calls tools like `view`, the CLI actually executes +them locally. The file must exist in the test's workDir. Create it in your test +before sending the prompt: + +```java +Files.writeString(ctx.getWorkDir().resolve("test.txt"), "Hello world!\n"); +``` + +## Common Pitfalls + +1. **Prompt mismatch** — The user content in YAML must exactly match what + `session.sendAndWait(new MessageOptions().setPrompt("..."))` sends. +2. **Forgetting `${system}`** — Always use `${system}` for the system role content + unless testing a specific system message matching scenario. +3. **Tool execution** — If the snapshot has the model calling `view` or other + built-in tools, the CLI will actually execute those tools. Files must exist. +4. **Snapshot name parameter** — pass the explicit snapshot base name to + `configureForTest`, e.g., `configureForTest("category", "my_method_name")`. + Do not rely on camelCase-to-snake_case conversion. +5. **Cannot record via Java** — `CapiProxy.java` forces `GITHUB_ACTIONS=true`. + Always handcraft snapshots or use the Node.js proxy directly for recording. diff --git a/.github/skills/new-java-e2e-test-yaml-and-test/examples.md b/.github/skills/new-java-e2e-test-yaml-and-test/examples.md new file mode 100644 index 000000000..af82ef4db --- /dev/null +++ b/.github/skills/new-java-e2e-test-yaml-and-test/examples.md @@ -0,0 +1,177 @@ +# Examples: New Java E2E Test with YAML Snapshot + +## Example 1: Simple single-turn conversation (no tool calls) + +### Snapshot YAML + +File: `test/snapshots/system_message_sections/should_use_replaced_identity_section_in_response.yaml` + +```yaml +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Who are you? + - role: assistant + content: >- + I'm Botanica, your helpful gardening assistant! I'm here to help you + with all things related to plants and gardening. Whether you have + questions about plant care, garden design, soil preparation, pest + management, or anything else in the world of gardening, I'm happy to + help. What would you like to know about plants or gardening today? +``` + +### Corresponding Java test method + +```java +@Test +void shouldUseReplacedIdentitySectionInResponse() throws Exception { + ctx.configureForTest("system_message_sections", "should_use_replaced_identity_section_in_response"); + + var systemMessage = new SystemMessageConfig().setMode(SystemMessageMode.CUSTOMIZE) + .setSections(Map.of(SystemMessageSections.IDENTITY, + new SectionOverride().setAction(SectionOverrideAction.REPLACE) + .setContent("You are a helpful gardening assistant called Botanica. " + + "You only answer questions about plants and gardening."))); + + try (CopilotClient client = ctx.createClient()) { + CopilotSession session = client.createSession(new SessionConfig().setSystemMessage(systemMessage) + .setOnPermissionRequest(PermissionHandler.APPROVE_ALL)).get(30, TimeUnit.SECONDS); + + try { + AssistantMessageEvent response = session + .sendAndWait(new MessageOptions().setPrompt("Who are you?"), 60_000).get(90, TimeUnit.SECONDS); + + assertNotNull(response, "Expected a response from the assistant"); + String content = response.getData().content().toLowerCase(); + assertTrue(content.contains("botanica") || content.contains("garden") || content.contains("plant"), + "Expected response to reflect the replaced identity section, but got: " + + response.getData().content()); + } finally { + session.close(); + } + } +} +``` + +**Key points:** +- `configureForTest("system_message_sections", "should_use_replaced_identity_section_in_response")` + maps to `test/snapshots/system_message_sections/should_use_replaced_identity_section_in_response.yaml` +- The prompt `"Who are you?"` exactly matches the YAML's user content +- `ctx.createClient()` uses `fake-token-for-e2e-tests` — works in CI + +--- + +## Example 2: Multi-turn with tool calls (from existing tests) + +### Snapshot YAML + +File: `test/snapshots/system_message_transform/should_invoke_transform_callbacks_with_section_content.yaml` + +```yaml +models: + - claude-sonnet-4.5 +conversations: + # First exchange: model decides to call tools + - messages: + - role: system + content: ${system} + - role: user + content: Read the contents of test.txt and tell me what it says + - role: assistant + content: I'll read the test.txt file for you. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Reading test.txt file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.txt"}' + # Second exchange: after tool results come back, model gives final answer + - messages: + - role: system + content: ${system} + - role: user + content: Read the contents of test.txt and tell me what it says + - role: assistant + content: I'll read the test.txt file for you. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Reading test.txt file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.txt"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: 1. Hello transform! + - role: assistant + content: |- + The file test.txt contains: + ``` + Hello transform! + ``` +``` + +### Corresponding Java test method + +```java +@Test +void transformOnIdentitySectionReceivesNonEmptyContent() throws Exception { + ctx.configureForTest("system_message_transform", "should_invoke_transform_callbacks_with_section_content"); + + ConcurrentHashMap capturedContent = new ConcurrentHashMap<>(); + + var systemMessage = new SystemMessageConfig().setMode(SystemMessageMode.CUSTOMIZE) + .setSections(Map.of(SystemMessageSections.IDENTITY, new SectionOverride().setTransform(content -> { + capturedContent.put("identity", content); + return CompletableFuture.completedFuture(content); + }), SystemMessageSections.TONE, new SectionOverride().setTransform(content -> { + capturedContent.put("tone", content); + return CompletableFuture.completedFuture(content); + }))); + + try (CopilotClient client = ctx.createClient()) { + // Create the file the snapshot expects the CLI view tool to read + Path testFile = ctx.getWorkDir().resolve("test.txt"); + Files.writeString(testFile, "Hello transform!"); + + CopilotSession session = client.createSession(new SessionConfig().setSystemMessage(systemMessage) + .setOnPermissionRequest(PermissionHandler.APPROVE_ALL)).get(30, TimeUnit.SECONDS); + + try { + AssistantMessageEvent response = session + .sendAndWait(new MessageOptions() + .setPrompt("Read the contents of test.txt and tell me what it says"), 60_000) + .get(90, TimeUnit.SECONDS); + + assertNotNull(response, "Expected a response from the assistant"); + + String identityContent = capturedContent.get("identity"); + assertNotNull(identityContent, "Expected identity transform callback to be invoked"); + assertTrue(!identityContent.isBlank(), "Expected identity section content to be non-empty"); + } finally { + session.close(); + } + } +} +``` + +**Key points:** +- The file `test.txt` must be created in `ctx.getWorkDir()` **before** sending the prompt +- The CLI's `view` tool will actually read that file; the YAML's tool result `"1. Hello transform!"` must match what `view` returns for that file content +- Two conversation entries: first for the tool-call decision, second for the final response after tool results diff --git a/java/src/test/java/com/github/copilot/LowLevelToolDefinitionIT.java b/java/src/test/java/com/github/copilot/LowLevelToolDefinitionIT.java new file mode 100644 index 000000000..bc74ca667 --- /dev/null +++ b/java/src/test/java/com/github/copilot/LowLevelToolDefinitionIT.java @@ -0,0 +1,112 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +package com.github.copilot; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import com.github.copilot.generated.AssistantMessageEvent; +import com.github.copilot.rpc.MessageOptions; +import com.github.copilot.rpc.PermissionHandler; +import com.github.copilot.rpc.SessionConfig; +import com.github.copilot.rpc.ToolDefinition; +import com.github.copilot.rpc.ToolSet; + +/** + * Failsafe integration test for explicit (non-ergonomic) tool definition APIs. + * + * @see Snapshot: tools/low_level_tool_definition + */ +class LowLevelToolDefinitionIT { + + private static E2ETestContext ctx; + private String currentPhase; + + record PhaseArgs(String phase) { + } + + @BeforeAll + static void setup() throws Exception { + ctx = E2ETestContext.create(); + } + + @AfterAll + static void teardown() throws Exception { + if (ctx != null) { + ctx.close(); + } + } + + @Test + void lowLevelToolDefinition() throws Exception { + ctx.configureForTest("tools", "low_level_tool_definition"); + + Map setPhaseSchema = Map.of("type", "object", "properties", + Map.of("phase", Map.of("type", "string", "enum", List.of("searching", "analyzing", "done"))), + "required", List.of("phase")); + + ToolDefinition setPhaseTool = ToolDefinition.create("set_current_phase", "Sets the current phase of the agent", + setPhaseSchema, invocation -> { + PhaseArgs args = invocation.getArgumentsAs(PhaseArgs.class); + currentPhase = args.phase(); + return CompletableFuture.completedFuture("Phase set to " + currentPhase); + }); + + Map searchSchema = Map.of("type", "object", "properties", + Map.of("keyword", Map.of("type", "string")), "required", List.of("keyword")); + + ToolDefinition searchTool = ToolDefinition.create("search_items", "Search for items by keyword", searchSchema, + invocation -> { + Map args = invocation.getArguments(); + String keyword = (String) args.get("keyword"); + assertTrue("copilot".equals(keyword), "Expected tool keyword to be 'copilot' but was: " + keyword); + return CompletableFuture.completedFuture("Found: item_alpha, item_beta"); + }); + + Map grepSchema = Map.of("type", "object", "properties", + Map.of("query", Map.of("type", "string")), "required", List.of("query")); + + ToolDefinition grepOverrideTool = ToolDefinition.createOverride("grep", "Custom grep override", grepSchema, + invocation -> { + Map args = invocation.getArguments(); + String query = (String) args.get("query"); + return CompletableFuture.completedFuture("CUSTOM_GREP: " + query); + }); + + try (CopilotClient client = ctx.createClient()) { + CopilotSession session = client + .createSession(new SessionConfig().setOnPermissionRequest(PermissionHandler.APPROVE_ALL) + .setAvailableTools(new ToolSet().addCustom("*").addBuiltIn("web_fetch")) + .setTools(List.of(setPhaseTool, searchTool, grepOverrideTool))) + .get(30, TimeUnit.SECONDS); + + try { + AssistantMessageEvent response = session.sendAndWait(new MessageOptions().setPrompt( + "First, set the current phase to 'analyzing'. Then search for items with keyword 'copilot'. Report the phase and search results."), + 60_000).get(90, TimeUnit.SECONDS); + + assertNotNull(response, "Expected a response from the assistant"); + String content = response.getData().content().toLowerCase(); + assertTrue(content.contains("analyzing"), + "Response should contain the updated phase: " + response.getData().content()); + assertTrue(content.contains("item_alpha") || content.contains("item_beta"), + "Response should contain search results: " + response.getData().content()); + assertTrue("analyzing".equals(currentPhase), + "Expected currentPhase to be analyzing but was: " + currentPhase); + } finally { + session.close(); + } + } + } +} diff --git a/test/snapshots/abort/should_abort_during_active_streaming.yaml b/test/snapshots/abort/should_abort_during_active_streaming.yaml index bd18eab2f..ea70c0d53 100644 --- a/test/snapshots/abort/should_abort_during_active_streaming.yaml +++ b/test/snapshots/abort/should_abort_during_active_streaming.yaml @@ -28,3 +28,10 @@ conversations: content: Say 'abort_recovery_ok'. - role: assistant content: abort_recovery_ok + - messages: + - role: system + content: ${system} + - role: user + content: Say 'abort_recovery_ok'. + - role: assistant + content: abort_recovery_ok diff --git a/test/snapshots/tools/low_level_tool_definition.yaml b/test/snapshots/tools/low_level_tool_definition.yaml new file mode 100644 index 000000000..03cb0748a --- /dev/null +++ b/test/snapshots/tools/low_level_tool_definition.yaml @@ -0,0 +1,32 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: First, set the current phase to 'analyzing'. Then search for items with keyword 'copilot'. Report the phase and + search results. + - role: assistant + content: I'll set the phase and run the search now. + tool_calls: + - id: toolcall_0 + type: function + function: + name: set_current_phase + arguments: '{"phase":"analyzing"}' + - id: toolcall_1 + type: function + function: + name: search_items + arguments: '{"keyword":"copilot"}' + - role: tool + tool_call_id: toolcall_0 + content: Phase set to analyzing + - role: tool + tool_call_id: toolcall_1 + content: "Found: item_alpha, item_beta" + - role: assistant + content: |- + Current phase: analyzing + Search results: item_alpha, item_beta