continuedev
diff --git a/‎CONTRIBUTING.md
-5 b/‎CONTRIBUTING.md
-5
diff --git a/‎core/.eslintrc.json
+1-1 b/‎core/.eslintrc.json
+1-1
diff --git a/‎core/context/providers/DocsContextProvider.ts
+9-45 b/‎core/context/providers/DocsContextProvider.ts
+9-45
diff --git a/‎core/index.d.ts
-2 b/‎core/index.d.ts
-2
diff --git a/‎core/indexing/docs/DocsCache.test.ts
+25 b/‎core/indexing/docs/DocsCache.test.ts
+25
diff --git a/‎core/indexing/docs/DocsCache.ts
+83 b/‎core/indexing/docs/DocsCache.ts
+83
@@ -26,7 +26,6 @@
     - [Writing Context Providers](#writing-context-providers)
     - [Adding an LLM Provider](#adding-an-llm-provider)
     - [Adding Models](#adding-models)
-    - [Adding Pre-indexed Documentation](#adding-pre-indexed-documentation)
   - [📐 Continue Architecture](#-continue-architecture)
     - [Continue VS Code Extension](#continue-vs-code-extension)
     - [Continue JetBrains Extension](#continue-jetbrains-extension)
@@ -212,10 +211,6 @@ While any model that works with a supported provider can be used with Continue,
 - LLM Providers: Since many providers use their own custom strings to identify models, you'll have to add the translation from Continue's model name (the one you added to `index.d.ts`) and the model string for each of these providers: [Ollama](./core/llm/llms/Ollama.ts), [Together](./core/llm/llms/Together.ts), and [Replicate](./core/llm/llms/Replicate.ts). You can find their full model lists here: [Ollama](https://ollama.ai/library), [Together](https://docs.together.ai/docs/inference-models), [Replicate](https://replicate.com/collections/streaming-language-models).
 - [Prompt Templates](./core/llm/index.ts) - In this file you'll find the `autodetectTemplateType` function. Make sure that for the model name you just added, this function returns the correct template type. This is assuming that the chat template for that model is already built in Continue. If not, you will have to add the template type and corresponding edit and chat templates.
 
-### Adding Pre-indexed Documentation
-
-Continue's @docs context provider lets you easily reference entire documentation sites and then uses embeddings to add the most relevant pages to context. To make the experience as smooth as possible, we pre-index many of the most popular documentation sites. If you'd like to add new documentation to this list, just add an object to the list in [preIndexedDocs.ts](./core/indexing/docs/preIndexedDocs.ts). `startUrl` is where the crawler will start and `rootUrl` will filter out any pages not on that site and under the path of `rootUrl`.
-
 ## 📐 Continue Architecture
 
 Continue consists of 2 parts that are split so that it can be extended to work in other IDEs as easily as possible:
 
@@ -14,7 +14,7 @@
     "eqeqeq": "error",
     "complexity": ["error", { "max": 38 }],
     "max-lines-per-function": ["error", { "max": 996 }],
-    "max-statements": ["error", { "max": 112 }],
+    "max-statements": ["error", { "max": 114 }],
     "max-depth": ["error", { "max": 6 }],
     "max-nested-callbacks": ["error", { "max": 4 }],
     "max-params": ["error", { "max": 11 }]
 
@@ -9,7 +9,6 @@ import {
   LoadSubmenuItemsArgs,
 } from "../..";
 import DocsService from "../../indexing/docs/DocsService";
-import preIndexedDocs from "../../indexing/docs/preIndexedDocs";
 
 import { INSTRUCTIONS_BASE_ITEM } from "./utils";
 
@@ -64,23 +63,12 @@ class DocsContextProvider extends BaseContextProvider {
     return chunksCopy;
   }
 
-  private _sortByPreIndexedDocs(
+  private _sortAlphabetically(
     submenuItems: ContextSubmenuItem[],
   ): ContextSubmenuItem[] {
-    // Sort submenuItems such that the objects with titles which don't occur in configs occur first, and alphabetized
+    // Sort submenu items alphabetically by title
     return submenuItems.sort((a, b) => {
-      const aTitleInConfigs = a.metadata?.preIndexed ?? false;
-      const bTitleInConfigs = b.metadata?.preIndexed ?? false;
-
-      // Primary criterion: Items not in configs come first
-      if (!aTitleInConfigs && bTitleInConfigs) {
-        return -1;
-      } else if (aTitleInConfigs && !bTitleInConfigs) {
-        return 1;
-      } else {
-        // Secondary criterion: Alphabetical order when both items are in the same category
-        return a.title.toString().localeCompare(b.title.toString());
-      }
+      return a.title.toString().localeCompare(b.title.toString());
     });
   }
 
@@ -165,46 +153,22 @@ class DocsContextProvider extends BaseContextProvider {
     }
     await docsService.isInitialized;
 
-    // Create map of docs url -> submenu item
-    const submenuItemsMap = new Map<string, ContextSubmenuItem>();
+    // Create an array to hold submenu items
+    const submenuItems: ContextSubmenuItem[] = [];
 
-    // Add custom docs from config
+    // Get all indexed docs from the database
     const docs = (await docsService.listMetadata()) ?? [];
     for (const { startUrl, title, favicon } of docs) {
-      submenuItemsMap.set(startUrl, {
+      submenuItems.push({
         title,
         id: startUrl,
         description: new URL(startUrl).hostname,
         icon: favicon,
       });
     }
 
-    // Add pre-indexed docs if supported
-    const canUsePreindexedDocs = await docsService.canUsePreindexedDocs();
-    if (canUsePreindexedDocs) {
-      for (const { startUrl, title } of Object.values(preIndexedDocs)) {
-        // Skip if overridden in config
-        if (docs.find((d) => d.startUrl === startUrl)) {
-          continue;
-        }
-        submenuItemsMap.set(startUrl, {
-          title,
-          id: startUrl,
-          description: new URL(startUrl).hostname,
-          metadata: {
-            preIndexed: true,
-          },
-        });
-      }
-    }
-
-    // Create array and sort if pre-indexed is supported
-    const submenuItems = Array.from(submenuItemsMap.values());
-    if (canUsePreindexedDocs) {
-      return this._sortByPreIndexedDocs(submenuItems);
-    }
-
-    return submenuItems;
+    // Sort alphabetically
+    return this._sortAlphabetically(submenuItems);
   }
 }
 
 
@@ -236,15 +236,13 @@ export interface SiteIndexingConfig {
   maxDepth?: number;
   faviconUrl?: string;
   useLocalCrawling?: boolean;
-  rootUrl?: string; // Currently only used by preindexed docs
 }
 
 export interface DocsIndexingDetails {
   startUrl: string;
   config: SiteIndexingConfig;
   indexingStatus: IndexingStatus | undefined;
   chunks: Chunk[];
-  isPreIndexedDoc: boolean;
 }
 
 export interface IContextProvider {
 
@@ -0,0 +1,25 @@
+import OpenAI from "../../llm/llms/OpenAI";
+import { DocsCache } from "./DocsCache"; // adjust import path as needed
+
+describe("DocsCache", () => {
+  let openAIEmbeddings: OpenAI;
+
+  beforeAll(() => {
+    openAIEmbeddings = new OpenAI({
+      apiKey: "",
+      model: "text-embedding-ada-002",
+    });
+  });
+
+  test("normalizeEmbeddingId() produces a valid ID without constructor name", async () => {
+    // Get the embedding ID from OpenAI embeddings provider
+    const embeddingId = DocsCache.normalizeEmbeddingId(
+      openAIEmbeddings.embeddingId,
+    );
+
+    // The ID should not contain the constructor name (OpenAI)
+    expect(embeddingId).toEqual(
+      `${openAIEmbeddings.model}::${openAIEmbeddings.maxEmbeddingChunkSize}`,
+    );
+  });
+});
@@ -0,0 +1,83 @@
+import request from "request";
+import { Chunk } from "../..";
+
+export interface SiteIndexingResults {
+  chunks: (Chunk & { embedding: number[] })[];
+  url: string;
+  title: string;
+}
+
+export class DocsCache {
+  static readonly AWS_REGION: string = "us-west-1";
+  static readonly BUCKET_NAME: string = "continue-preindexed-docs";
+
+  /**
+   * Normalizes an embedding ID by stripping the constructor name part.
+   * This is done because we don't care about the provider, just the
+   * model and the max embedding chunk size.
+   */
+  static normalizeEmbeddingId(embeddingId: string): string {
+    // Split by "::" and remove the first part (constructor name)
+    const parts = embeddingId.split("::");
+    if (parts.length <= 1) return embeddingId; // Return original if no "::" found
+
+    // Return everything except the first part, joining with "::"
+    return parts.slice(1).join("::");
+  }
+
+  /**
+   * Gets the filepath for a given embedding ID and URL
+   */
+  static getFilepathForEmbeddingIdAndUrl(
+    embeddingId: string,
+    url: string,
+  ): string {
+    const normalizedEmbeddingId = DocsCache.normalizeEmbeddingId(embeddingId);
+    const normalizedUrl = encodeURIComponent(url.replace(/\//g, "_"));
+    return normalizedEmbeddingId + "/" + normalizedUrl;
+  }
+
+  /**
+   * Gets the fully qualified S3 URL for a given filepath
+   */
+  private static getS3Url(filepath: string): string {
+    const pathname = filepath.split("/").map(encodeURIComponent).join("/");
+    return `https://${this.BUCKET_NAME}.s3.${this.AWS_REGION}.amazonaws.com/${pathname}`;
+  }
+
+  /**
+   * Downloads cached site indexing results from S3 for a given embedding ID and URL
+   * @param embeddingId The embedding ID
+   * @param url The URL of the document
+   * @returns The downloaded data as a string
+   */
+  static async getDocsCacheForUrl(
+    embeddingId: string,
+    url: string,
+  ): Promise<string> {
+    const filepath = DocsCache.getFilepathForEmbeddingIdAndUrl(
+      embeddingId,
+      url,
+    );
+
+    return new Promise<string>((resolve, reject) => {
+      let data = "";
+      const url = this.getS3Url(filepath);
+      const download = request({
+        url,
+      });
+
+      download.on("response", (response: any) => {
+        if (response.statusCode !== 200) {
+          reject(
+            new Error("There was an error retrieving the pre-indexed doc"),
+          );
+        }
+      });
+
+      download.on("error", (err: any) => reject(err));
+      download.on("data", (chunk: any) => (data += chunk));
+      download.on("end", () => resolve(data));
+    });
+  }
+}
Original file line number	Diff line number	Diff line change
`@@ -236,15 +236,13 @@ export interface SiteIndexingConfig {`
`236`	`236`	`maxDepth?: number;`
`237`	`237`	`faviconUrl?: string;`
`238`	`238`	`useLocalCrawling?: boolean;`
`239`		`- rootUrl?: string; // Currently only used by preindexed docs`
`240`	`239`	`}`
`241`	`240`
`242`	`241`	`export interface DocsIndexingDetails {`
`243`	`242`	`startUrl: string;`
`244`	`243`	`config: SiteIndexingConfig;`
`245`	`244`	`indexingStatus: IndexingStatus \| undefined;`
`246`	`245`	`chunks: Chunk[];`
`247`		`- isPreIndexedDoc: boolean;`
`248`	`246`	`}`
`249`	`247`
`250`	`248`	`export interface IContextProvider {`