API v1

Integrate in Minutes

RESTful API for intelligent document extraction. Upload PDFs, get structured JSON back—asynchronously.

Base: https://pdf2text.ai/api/v1

Authentication

All API requests require a Bearer token in the Authorization header. Generate your API key from your account settings.

Get API Key
# Set your API key
export P2T_API_KEY="your_api_key_here"

# Include in all requests
curl -H "Authorization: Bearer $P2T_API_KEY" ...
import requests
import os

API_KEY = os.environ["P2T_API_KEY"]
headers = {"Authorization": f"Bearer {API_KEY}"}
const API_KEY = process.env.P2T_API_KEY;

const headers = {
  'Authorization': `Bearer ${API_KEY}`
};
POST

Upload Documents

Upload one or more PDF files to create a document group. Each upload returns a group ID for tracking.

Endpoint

POST /api/v1/documents/upload/

Parameters

file binary PDF file (required)
grouping_id uuid Optional. Add to existing group.
document_type string Optional. Classification hint.
# Upload a PDF file
curl -X POST https://pdf2text.ai/api/v1/documents/upload/ \
  -H "Authorization: Bearer $P2T_API_KEY" \
  -F "file=@invoice.pdf"

# Add to existing group
curl -X POST https://pdf2text.ai/api/v1/documents/upload/ \
  -H "Authorization: Bearer $P2T_API_KEY" \
  -F "file=@invoice2.pdf" \
  -F "grouping_id=a1b2c3d4-..."
import requests

url = "https://pdf2text.ai/api/v1/documents/upload/"

# Upload single file
with open("invoice.pdf", "rb") as f:
    response = requests.post(
        url,
        headers=headers,
        files={"file": f}
    )

data = response.json()
group_id = data["group_id"]
const fs = require('fs');
const FormData = require('form-data');

const form = new FormData();
form.append('file', fs.createReadStream('invoice.pdf'));

const response = await fetch(
  'https://pdf2text.ai/api/v1/documents/upload/',
  {
    method: 'POST',
    headers: { ...headers, ...form.getHeaders() },
    body: form
  }
);
const { group_id } = await response.json();
Response
{
  "status": "success",
  "group_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
  "documents": [
    {
      "id": 42,
      "filename": "invoice.pdf",
      "pages": 3
    }
  ]
}
POST

Start Extraction Run

Kick off asynchronous processing for a document group. Enable auto-routing to automatically classify and extract each document.

Endpoint

POST /api/v1/groups/{group_id}/runs/

Request Body

auto_route boolean Auto-classify (default: true)
template_id uuid Force specific template
callback_url string Webhook URL
callback_secret string HMAC signing secret
# Start extraction with auto-routing
curl -X POST https://pdf2text.ai/api/v1/groups/$GROUP_ID/runs/ \
  -H "Authorization: Bearer $P2T_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "auto_route": true,
    "callback_url": "https://your-app.com/webhook"
  }'
# Start extraction run
url = f"https://pdf2text.ai/api/v1/groups/{group_id}/runs/"

response = requests.post(
    url,
    headers=headers,
    json={
        "auto_route": True,
        "callback_url": "https://your-app.com/webhook"
    }
)

run_data = response.json()
run_id = run_data["run_id"]
// Start extraction run
const response = await fetch(
  `https://pdf2text.ai/api/v1/groups/${groupId}/runs/`,
  {
    method: 'POST',
    headers: {
      ...headers,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      auto_route: true,
      callback_url: 'https://your-app.com/webhook'
    })
  }
);

const { run_id } = await response.json();
Response
{
  "status": "queued",
  "run_id": "run_abc123xyz",
  "message": "Processing started"
}
GET

Fetch Results

Poll the run endpoint or use webhooks to retrieve extraction results. Export as JSON or Excel.

Endpoints

GET /api/runs/{run_id}/ GET /api/runs/{run_id}/export.xlsx

Response Status Codes

200 202 401 404
# Poll for results
curl https://pdf2text.ai/api/runs/$RUN_ID/ \
  -H "Authorization: Bearer $P2T_API_KEY"

# Download as Excel
curl -o results.xlsx \
  https://pdf2text.ai/api/runs/$RUN_ID/export.xlsx \
  -H "Authorization: Bearer $P2T_API_KEY"
import time

# Poll until complete
while True:
    response = requests.get(
        f"https://pdf2text.ai/api/runs/{run_id}/",
        headers=headers
    )
    data = response.json()

    if data["status"] == "completed":
        results = data["results"]
        break

    time.sleep(2)  # Wait 2 seconds

# Download Excel export
excel = requests.get(
    f"https://pdf2text.ai/api/runs/{run_id}/export.xlsx",
    headers=headers
)
with open("results.xlsx", "wb") as f:
    f.write(excel.content)
// Poll for results
const pollResults = async (runId) => {
  while (true) {
    const response = await fetch(
      `https://pdf2text.ai/api/runs/${runId}/`,
      { headers }
    );
    const data = await response.json();

    if (data.status === 'completed') {
      return data.results;
    }

    await new Promise(r => setTimeout(r, 2000));
  }
};

const results = await pollResults(runId);
Response
{
  "status": "completed",
  "run_id": "run_abc123xyz",
  "results": [
    {
      "document_id": 42,
      "template": "invoice",
      "confidence": 0.97,
      "extracted": {
        "vendor": "Acme Corp",
        "total": "$1,234.56",
        "date": "2024-01-15"
      }
    }
  ]
}
GET

Templates

List available extraction templates. Use template IDs to force specific extraction schemas.

Endpoint

GET /api/templates/
Manage Templates
# List available templates
curl https://pdf2text.ai/api/templates/ \
  -H "Authorization: Bearer $P2T_API_KEY"
# List available templates
response = requests.get(
    "https://pdf2text.ai/api/templates/",
    headers=headers
)

templates = response.json()["templates"]
for t in templates:
    print(f"{t['name']}: {t['id']}")
// List available templates
const response = await fetch(
  'https://pdf2text.ai/api/templates/',
  { headers }
);

const { templates } = await response.json();
templates.forEach(t => console.log(`${t.name}: ${t.id}`));
Response
{
  "status": "success",
  "templates": [
    {
      "id": "tmpl_invoice_v2",
      "slug": "invoice",
      "name": "Invoice",
      "description": "Standard invoice extraction",
      "extraction_mode": "structured"
    },
    {
      "id": "tmpl_receipt_v1",
      "slug": "receipt",
      "name": "Receipt",
      "description": "Receipt and expense extraction",
      "extraction_mode": "structured"
    }
  ]
}

Need Help?

Use the Review Queue to validate extractions and maintain an audit trail.