diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index b0dce49..85901ea 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -6,14 +6,16 @@ on:
     branches: [main]
     paths:
       - 'src/mcp/**'
-      - 'src/lib/sentry*'
-      - 'src/evals/workflow.test.ts'
+      - 'src/lib/sentry-api/**'
+      - 'src/evals/**'
+      - '*.eval.ts'
       - '.github/workflows/eval.yml'
   pull_request:
     paths:
       - 'src/mcp/**'
-      - 'src/lib/sentry*'
-      - 'src/evals/workflow.test.ts'
+      - 'src/lib/sentry-api/**'
+      - 'src/evals/**'
+      - '*.eval.ts'
       - '.github/workflows/eval.yml'
 
 jobs:
diff --git a/src/evals/create-project.eval.ts b/src/evals/create-project.eval.ts
new file mode 100644
index 0000000..85e9f5c
--- /dev/null
+++ b/src/evals/create-project.eval.ts
@@ -0,0 +1,18 @@
+import { describeEval } from "vitest-evals";
+import { Factuality, FIXTURES, TaskRunner } from "./utils";
+
+describeEval("create-project", {
+  data: async () => {
+    return [
+      {
+        input: `Create a new project in Sentry for '${FIXTURES.organizationSlug}' called '${FIXTURES.projectSlug}' with the '${FIXTURES.teamSlug}' team. Output **only** the project slug and the SENTRY_DSN in the format of:\n<PROJECT_SLUG>\n<SENTRY_DSN>`,
+        expected:
+          "cloudflare-mcp\nhttps://d20df0a1ab5031c7f3c7edca9c02814d@o4509106732793856.ingest.us.sentry.io/4509109104082945",
+      },
+    ];
+  },
+  task: TaskRunner(),
+  scorers: [Factuality()],
+  threshold: 0.6,
+  timeout: 30000,
+});
diff --git a/src/evals/create-team.eval.ts b/src/evals/create-team.eval.ts
new file mode 100644
index 0000000..00f21ed
--- /dev/null
+++ b/src/evals/create-team.eval.ts
@@ -0,0 +1,17 @@
+import { describeEval } from "vitest-evals";
+import { Factuality, FIXTURES, TaskRunner } from "./utils";
+
+describeEval("create-team", {
+  data: async () => {
+    return [
+      {
+        input: `Create a new team in Sentry for '${FIXTURES.organizationSlug}' called 'the-goats' response with **only** the team slug and no other text.`,
+        expected: FIXTURES.teamSlug,
+      },
+    ];
+  },
+  task: TaskRunner(),
+  scorers: [Factuality()],
+  threshold: 0.6,
+  timeout: 30000,
+});
diff --git a/src/evals/get-issue.eval.ts b/src/evals/get-issue.eval.ts
new file mode 100644
index 0000000..05ef87d
--- /dev/null
+++ b/src/evals/get-issue.eval.ts
@@ -0,0 +1,28 @@
+import { describeEval } from "vitest-evals";
+import { Factuality, FIXTURES, TaskRunner } from "./utils";
+
+describeEval("get-issue", {
+  data: async () => {
+    return [
+      {
+        input: "Analyze issue REMOTE-MCP-41 from Sentry.",
+        expected: [
+          "## REMOTE-MCP-41",
+          "- **Error**: Tool list_organizations is already registered",
+          "- **Issue ID**: REMOTE-MCP-41",
+          "- **Stacktrace**:",
+          "```",
+          "index.js at line 7809:27",
+          '"index.js" at line 8029:24',
+          '"index.js" at line 19631:28',
+          "```",
+          `- **URL**: https://${FIXTURES.organizationSlug}.sentry.io/issues/REMOTE-MCP-41`,
+        ].join("\n"),
+      },
+    ];
+  },
+  task: TaskRunner(),
+  scorers: [Factuality()],
+  threshold: 0.6,
+  timeout: 30000,
+});
diff --git a/src/evals/list-issues.eval.ts b/src/evals/list-issues.eval.ts
new file mode 100644
index 0000000..20835f5
--- /dev/null
+++ b/src/evals/list-issues.eval.ts
@@ -0,0 +1,29 @@
+import { describeEval } from "vitest-evals";
+import { Factuality, FIXTURES, TaskRunner } from "./utils";
+
+describeEval("list-issues", {
+  data: async () => {
+    return [
+      {
+        input:
+          "Can you you give me a list of common production errors messages, with their stacktrace and a url for more information?",
+        expected: [
+          "## REMOTE-MCP-41",
+          "- **Error**: Tool list_organizations is already registered",
+          "- **Issue ID**: REMOTE-MCP-41",
+          "- **Stacktrace**:",
+          "```",
+          "index.js at line 7809:27",
+          '"index.js" at line 8029:24',
+          '"index.js" at line 19631:28',
+          "```",
+          `- **URL**: https://${FIXTURES.organizationSlug}.sentry.io/issues/REMOTE-MCP-41`,
+        ].join("\n"),
+      },
+    ];
+  },
+  task: TaskRunner(),
+  scorers: [Factuality()],
+  threshold: 0.6,
+  timeout: 30000,
+});
diff --git a/src/evals/list-organizations.eval.ts b/src/evals/list-organizations.eval.ts
new file mode 100644
index 0000000..8717061
--- /dev/null
+++ b/src/evals/list-organizations.eval.ts
@@ -0,0 +1,17 @@
+import { describeEval } from "vitest-evals";
+import { Factuality, FIXTURES, TaskRunner } from "./utils";
+
+describeEval("list-organizations", {
+  data: async () => {
+    return [
+      {
+        input: `What organizations do I have access to in Sentry`,
+        expected: FIXTURES.organizationSlug,
+      },
+    ];
+  },
+  task: TaskRunner(),
+  scorers: [Factuality()],
+  threshold: 0.6,
+  timeout: 30000,
+});
diff --git a/src/evals/list-projects.eval.ts b/src/evals/list-projects.eval.ts
new file mode 100644
index 0000000..c0388d7
--- /dev/null
+++ b/src/evals/list-projects.eval.ts
@@ -0,0 +1,17 @@
+import { describeEval } from "vitest-evals";
+import { Factuality, FIXTURES, TaskRunner } from "./utils";
+
+describeEval("list-projects", {
+  data: async () => {
+    return [
+      {
+        input: `What projects do I have access to in Sentry for '${FIXTURES.organizationSlug}'`,
+        expected: FIXTURES.projectSlug,
+      },
+    ];
+  },
+  task: TaskRunner(),
+  scorers: [Factuality()],
+  threshold: 0.6,
+  timeout: 30000,
+});
diff --git a/src/evals/list-teams.eval.ts b/src/evals/list-teams.eval.ts
new file mode 100644
index 0000000..9cd9c5d
--- /dev/null
+++ b/src/evals/list-teams.eval.ts
@@ -0,0 +1,17 @@
+import { describeEval } from "vitest-evals";
+import { Factuality, FIXTURES, TaskRunner } from "./utils";
+
+describeEval("workflow", {
+  data: async () => {
+    return [
+      {
+        input: `What teams do I have access to in Sentry for '${FIXTURES.organizationSlug}'`,
+        expected: FIXTURES.teamSlug,
+      },
+    ];
+  },
+  task: TaskRunner(),
+  scorers: [Factuality()],
+  threshold: 0.6,
+  timeout: 30000,
+});
diff --git a/src/evals/utils.ts b/src/evals/utils.ts
index 216afd9..7b8616d 100644
--- a/src/evals/utils.ts
+++ b/src/evals/utils.ts
@@ -1,6 +1,67 @@
-import { generateObject, type LanguageModel } from "ai";
+import { openai } from "@ai-sdk/openai";
+import {
+  experimental_createMCPClient,
+  generateObject,
+  streamText,
+  type LanguageModel,
+} from "ai";
+import { Experimental_StdioMCPTransport } from "ai/mcp-stdio";
 import { z } from "zod";
 
+export const FIXTURES = {
+  organizationSlug: "sentry-mcp-evals",
+  teamSlug: "the-goats",
+  projectSlug: "cloudflare-mcp",
+};
+
+const defaultModel = openai("gpt-4o");
+
+export function TaskRunner(model: LanguageModel = defaultModel) {
+  return async function TaskRunner(input: string) {
+    const transport = new Experimental_StdioMCPTransport({
+      command: "npm",
+      args: ["run", "start:stdio", "--mocks"],
+      env: {
+        SENTRY_AUTH_TOKEN: process.env.SENTRY_AUTH_TOKEN!,
+      },
+    });
+    const mcpClient = await experimental_createMCPClient({
+      transport,
+    });
+
+    const tools = await mcpClient.tools();
+
+    try {
+      const result = streamText({
+        model,
+        tools,
+        system:
+          "You are an assistant responsible for evaluating the results of calling various tools. Given the user's query, use the tools available to you to answer the question.",
+        prompt: input,
+        maxRetries: 1,
+        maxSteps: 10,
+        experimental_telemetry: {
+          isEnabled: true,
+        },
+        onError: (error) => {
+          console.error(error);
+        },
+      });
+
+      for await (const part of result.fullStream) {
+        // console.log(part);
+      }
+
+      return await result.text;
+    } catch (error) {
+      console.error(error);
+      throw error;
+    } finally {
+      await mcpClient.close();
+    }
+  };
+}
+
 /**
  * A Factuality checker utilizing the `ai` SDK based on the implementation in `autoevals`.
  *
@@ -10,7 +71,7 @@ import { z } from "zod";
  * scorers: [Factuality(openai("gpt-4o"))]
  * ```
  */
-export function Factuality(model: LanguageModel) {
+export function Factuality(model: LanguageModel = defaultModel) {
   return async function Factuality(opts: {
     input: string;
     output: string;
diff --git a/src/evals/workflow.eval.ts b/src/evals/workflow.eval.ts
deleted file mode 100644
index 722a9da..0000000
--- a/src/evals/workflow.eval.ts
+++ /dev/null
@@ -1,119 +0,0 @@
-import { openai } from "@ai-sdk/openai";
-import { experimental_createMCPClient, streamText } from "ai";
-import { Experimental_StdioMCPTransport } from "ai/mcp-stdio";
-import { describeEval } from "vitest-evals";
-import { Factuality } from "./utils";
-
-const model = openai("gpt-4o");
-
-const CONFIG = {
-  organizationSlug: "sentry-mcp-evals",
-  teamSlug: "the-goats",
-  projectSlug: "cloudflare-mcp",
-};
-// TODO: support this in
-describeEval("workflow", {
-  data: async () => {
-    return [
-      {
-        input: `What organizations do I have access to in Sentry`,
-        expected: CONFIG.organizationSlug,
-      },
-      {
-        input: `What teams do I have access to in Sentry for '${CONFIG.organizationSlug}'`,
-        expected: CONFIG.teamSlug,
-      },
-      {
-        input: `What projects do I have access to in Sentry for '${CONFIG.organizationSlug}'`,
-        expected: CONFIG.projectSlug,
-      },
-      {
-        input: `Create a new team in Sentry for '${CONFIG.organizationSlug}' called 'the-goats' response with **only** the team slug and no other text.`,
-        expected: CONFIG.teamSlug,
-      },
-      {
-        input: `Create a new project in Sentry for '${CONFIG.organizationSlug}' called '${CONFIG.projectSlug}' with the '${CONFIG.teamSlug}' team. Output **only** the project slug and the SENTRY_DSN in the format of:\n<PROJECT_SLUG>\n<SENTRY_DSN>`,
-        expected:
-          "cloudflare-mcp\nhttps://d20df0a1ab5031c7f3c7edca9c02814d@o4509106732793856.ingest.us.sentry.io/4509109104082945",
-      },
-      {
-        input:
-          "Can you you give me a list of common production errors messages, with their stacktrace and a url for more information?",
-        expected: [
-          "## REMOTE-MCP-41",
-          "- **Error**: Tool list_organizations is already registered",
-          "- **Issue ID**: REMOTE-MCP-41",
-          "- **Stacktrace**:",
-          "```",
-          "index.js at line 7809:27",
-          '"index.js" at line 8029:24',
-          '"index.js" at line 19631:28',
-          "```",
-          `- **URL**: https://${CONFIG.organizationSlug}.sentry.io/issues/REMOTE-MCP-41`,
-        ].join("\n"),
-      },
-      {
-        input: "Analyze issue REMOTE-MCP-41 from Sentry.",
-        expected: [
-          "## REMOTE-MCP-41",
-          "- **Error**: Tool list_organizations is already registered",
-          "- **Issue ID**: REMOTE-MCP-41",
-          "- **Stacktrace**:",
-          "```",
-          "index.js at line 7809:27",
-          '"index.js" at line 8029:24',
-          '"index.js" at line 19631:28',
-          "```",
-          `- **URL**: https://${CONFIG.organizationSlug}.sentry.io/issues/REMOTE-MCP-41`,
-        ].join("\n"),
-      },
-    ];
-  },
-  task: async (input) => {
-    const transport = new Experimental_StdioMCPTransport({
-      command: "npm",
-      args: ["run", "start:stdio", "--mocks"],
-      env: {
-        SENTRY_AUTH_TOKEN: process.env.SENTRY_AUTH_TOKEN!,
-      },
-    });
-    const mcpClient = await experimental_createMCPClient({
-      transport,
-    });
-
-    const tools = await mcpClient.tools();
-
-    try {
-      const result = streamText({
-        model,
-        tools,
-        system:
-          "You are an assistant responsible for evaluating the results of calling various tools. Given the user's query, use the tools available to you to answer the question.",
-        prompt: input,
-        maxRetries: 1,
-        maxSteps: 10,
-        experimental_telemetry: {
-          isEnabled: true,
-        },
-        onError: (error) => {
-          console.error(error);
-        },
-      });
-
-      for await (const part of result.fullStream) {
-        // console.log(part);
-      }
-
-      return await result.text;
-    } catch (error) {
-      console.error(error);
-      throw error;
-    } finally {
-      await mcpClient.close();
-    }
-  },
-  scorers: [Factuality(model)],
-  // skipIf: () => !process.env.OPENAI_API_KEY,
-  threshold: 0.6,
-  timeout: 30000,
-});