diff --git a/docs/docs.json b/docs/docs.json
index 86b73cf527..cf7d0a3949 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -312,6 +312,7 @@
"group": "Python guides",
"pages": [
"guides/python/python-image-processing",
+ "guides/python/python-doc-to-markdown",
"guides/python/python-crawl4ai",
"guides/python/python-pdf-form-extractor"
]
diff --git a/docs/guides/introduction.mdx b/docs/guides/introduction.mdx
index add0c353c1..9a7e7761eb 100644
--- a/docs/guides/introduction.mdx
+++ b/docs/guides/introduction.mdx
@@ -29,6 +29,7 @@ Get set up fast using our detailed walk-through guides.
| [Cursor rules](/guides/cursor-rules) | Use Cursor rules to help write Trigger.dev tasks |
| [Prisma](/guides/frameworks/prisma) | How to setup Prisma with Trigger.dev |
| [Python image processing](/guides/python/python-image-processing) | Use Python and Pillow to process images |
+| [Python document to markdown](/guides/python/python-doc-to-markdown) | Use Python and MarkItDown to convert documents to markdown |
| [Python PDF form extractor](/guides/python/python-pdf-form-extractor) | Use Python, PyMuPDF and Trigger.dev to extract data from a PDF form |
| [Python web crawler](/guides/python/python-crawl4ai) | Use Python, Crawl4AI and Playwright to create a headless web crawler |
| [Sequin database triggers](/guides/frameworks/sequin) | Trigger tasks from database changes using Sequin |
diff --git a/docs/guides/python/python-crawl4ai.mdx b/docs/guides/python/python-crawl4ai.mdx
index c821bf6c98..526d0eaf58 100644
--- a/docs/guides/python/python-crawl4ai.mdx
+++ b/docs/guides/python/python-crawl4ai.mdx
@@ -1,6 +1,6 @@
---
title: "Python headless browser web crawler example"
-sidebarTitle: "Python headless web crawler"
+sidebarTitle: "Headless web crawler"
description: "Learn how to use Python, Crawl4AI and Playwright to create a headless browser web crawler with Trigger.dev."
---
diff --git a/docs/guides/python/python-doc-to-markdown.mdx b/docs/guides/python/python-doc-to-markdown.mdx
new file mode 100644
index 0000000000..a36a240cec
--- /dev/null
+++ b/docs/guides/python/python-doc-to-markdown.mdx
@@ -0,0 +1,224 @@
+---
+title: "Convert documents to markdown using Python and MarkItDown"
+sidebarTitle: "Convert docs to markdown"
+description: "Learn how to use Trigger.dev with Python to convert documents to markdown using MarkItDown."
+---
+
+import PythonLearnMore from "/snippets/python-learn-more.mdx";
+
+
+ This project uses Trigger.dev v4 (which is currently in beta as of 28 April 2025). If you want to
+ run this project you will need to [upgrade to v4](/upgrade-to-v4).
+
+
+## Overview
+
+Convert documents to markdown using Microsoft's [MarkItDown](https://github.com/microsoft/markitdown) library. This can be especially useful for preparing documents in a structured format for AI applications.
+
+## Prerequisites
+
+- A project with [Trigger.dev initialized](/quick-start)
+- [Python](https://www.python.org/) installed on your local machine. _This example requires Python 3.10 or higher._
+
+## Features
+
+- A Trigger.dev task which downloads a document from a URL and runs the Python script which converts it to markdown
+- A Python script to convert documents to markdown using Microsoft's [MarkItDown](https://github.com/microsoft/markitdown) library
+- Uses our [Python build extension](/config/extensions/pythonExtension) to install dependencies and run Python scripts
+
+## GitHub repo
+
+
+ Click here to view the full code for this project in our examples repository on GitHub. You can
+ fork it and use it as a starting point for your own project.
+
+
+## The code
+
+### Build configuration
+
+After you've initialized your project with Trigger.dev, add these build settings to your `trigger.config.ts` file:
+
+```ts trigger.config.ts
+import { pythonExtension } from "@trigger.dev/python/extension";
+import { defineConfig } from "@trigger.dev/sdk/v3";
+
+export default defineConfig({
+ runtime: "node",
+ project: "",
+ // Your other config settings...
+ build: {
+ extensions: [
+ pythonExtension({
+ // The path to your requirements.txt file
+ requirementsFile: "./requirements.txt",
+ // The path to your Python binary
+ devPythonBinaryPath: `venv/bin/python`,
+ // The paths to your Python scripts to run
+ scripts: ["src/python/**/*.py"],
+ }),
+ ],
+ },
+});
+```
+
+
+ Learn more about executing scripts in your Trigger.dev project using our Python build extension
+ [here](/config/extensions/pythonExtension).
+
+
+### Task code
+
+This task uses the `python.runScript` method to run the `markdown-converter.py` script with the given document URL as an argument.
+
+```ts src/trigger/convertToMarkdown.ts
+import { task } from "@trigger.dev/sdk/v3";
+import { python } from "@trigger.dev/python";
+import * as fs from "fs";
+import * as path from "path";
+import * as os from "os";
+
+export const convertToMarkdown = task({
+ id: "convert-to-markdown",
+ run: async (payload: { url: string }) => {
+ const { url } = payload;
+
+ // STEP 1: Create temporary file with unique name
+ const tempDir = os.tmpdir();
+ const fileName = `doc-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`;
+ const urlPath = new URL(url).pathname;
+ const extension = path.extname(urlPath) || ".docx";
+ const tempFilePath = path.join(tempDir, `${fileName}${extension}`);
+
+ // STEP 2: Download file from URL
+ const response = await fetch(url);
+ const buffer = await response.arrayBuffer();
+ await fs.promises.writeFile(tempFilePath, Buffer.from(buffer));
+
+ // STEP 3: Run Python script to convert document to markdown
+ const pythonResult = await python.runScript("./src/python/markdown-converter.py", [
+ JSON.stringify({ file_path: tempFilePath }),
+ ]);
+
+ // STEP 4: Clean up temporary file
+ fs.unlink(tempFilePath, () => {});
+
+ // STEP 5: Process result
+ if (pythonResult.stdout) {
+ const result = JSON.parse(pythonResult.stdout);
+ return {
+ url,
+ markdown: result.status === "success" ? result.markdown : null,
+ error: result.status === "error" ? result.error : null,
+ success: result.status === "success",
+ };
+ }
+
+ return {
+ url,
+ markdown: null,
+ error: "No output from Python script",
+ success: false,
+ };
+ },
+});
+```
+
+### Add a requirements.txt file
+
+Add the following to your `requirements.txt` file. This is required in Python projects to install the dependencies.
+
+```txt requirements.txt
+markitdown[all]
+```
+
+### The Python script
+
+The Python script uses MarkItDown to convert documents to Markdown format.
+
+```python src/python/markdown-converter.py
+import json
+import sys
+import os
+from markitdown import MarkItDown
+
+def convert_to_markdown(file_path):
+ """Convert a file to markdown format using MarkItDown"""
+ # Check if file exists
+ if not os.path.exists(file_path):
+ raise FileNotFoundError(f"File not found: {file_path}")
+
+ # Initialize MarkItDown
+ md = MarkItDown()
+
+ # Convert the file
+ try:
+ result = md.convert(file_path)
+ return result.text_content
+ except Exception as e:
+ raise Exception(f"Error converting file: {str(e)}")
+
+def process_trigger_task(file_path):
+ """Process a file and convert to markdown"""
+ try:
+ markdown_result = convert_to_markdown(file_path)
+ return {
+ "status": "success",
+ "markdown": markdown_result
+ }
+ except Exception as e:
+ return {
+ "status": "error",
+ "error": str(e)
+ }
+
+if __name__ == "__main__":
+ # Get the file path from command line arguments
+ if len(sys.argv) < 2:
+ print(json.dumps({"status": "error", "error": "No file path provided"}))
+ sys.exit(1)
+
+ try:
+ config = json.loads(sys.argv[1])
+ file_path = config.get("file_path")
+
+ if not file_path:
+ print(json.dumps({"status": "error", "error": "No file path specified in config"}))
+ sys.exit(1)
+
+ result = process_trigger_task(file_path)
+ print(json.dumps(result))
+ except Exception as e:
+ print(json.dumps({"status": "error", "error": str(e)}))
+ sys.exit(1)
+```
+
+## Testing your task
+
+1. Create a virtual environment `python -m venv venv`
+2. Activate the virtual environment, depending on your OS: On Mac/Linux: `source venv/bin/activate`, on Windows: `venv\Scripts\activate`
+3. Install the Python dependencies `pip install -r requirements.txt`. _Make sure you have Python 3.10 or higher installed._
+4. Copy the project ref from your [Trigger.dev dashboard](https://cloud.trigger.dev) and add it to the `trigger.config.ts` file.
+5. Run the Trigger.dev CLI `dev` command (it may ask you to authorize the CLI if you haven't already).
+6. Test the task in the dashboard by providing a valid document URL.
+7. Deploy the task to production using the Trigger.dev CLI `deploy` command.
+
+## MarkItDown Conversion Capabilities
+
+- Convert various file formats to Markdown:
+ - Office formats (Word, PowerPoint, Excel)
+ - PDFs
+ - Images (with optional LLM-generated descriptions)
+ - HTML, CSV, JSON, XML
+ - Audio files (with optional transcription)
+ - ZIP archives
+ - And more
+- Preserve document structure (headings, lists, tables, etc.)
+- Handle multiple input methods (file paths, URLs, base64 data)
+- Optional Azure Document Intelligence integration for better PDF and image conversion
+
+
diff --git a/docs/guides/python/python-image-processing.mdx b/docs/guides/python/python-image-processing.mdx
index 64e73ecdae..0f81d2b54f 100644
--- a/docs/guides/python/python-image-processing.mdx
+++ b/docs/guides/python/python-image-processing.mdx
@@ -1,6 +1,6 @@
---
title: "Python image processing example"
-sidebarTitle: "Python image processing"
+sidebarTitle: "Process images"
description: "Learn how to use Trigger.dev with Python to process images from URLs and upload them to S3."
---
diff --git a/docs/guides/python/python-pdf-form-extractor.mdx b/docs/guides/python/python-pdf-form-extractor.mdx
index a62f0e8dc1..3367ea9baf 100644
--- a/docs/guides/python/python-pdf-form-extractor.mdx
+++ b/docs/guides/python/python-pdf-form-extractor.mdx
@@ -1,6 +1,6 @@
---
title: "Python PDF form extractor example"
-sidebarTitle: "Python PDF form extractor"
+sidebarTitle: "Extract form data from PDFs"
description: "Learn how to use Trigger.dev with Python to extract form data from PDF files."
---