RESTful API for intelligent document extraction. Upload PDFs, get structured JSON back—asynchronously.
https://pdf2text.ai/api/v1
Base: https://pdf2text.ai/api/v1
All API requests require a Bearer token in the Authorization header. Generate your API key from your account settings.
Get API Key# Set your API key
export P2T_API_KEY="your_api_key_here"
# Include in all requests
curl -H "Authorization: Bearer $P2T_API_KEY" ...
import requests
import os
API_KEY = os.environ["P2T_API_KEY"]
headers = {"Authorization": f"Bearer {API_KEY}"}
const API_KEY = process.env.P2T_API_KEY;
const headers = {
'Authorization': `Bearer ${API_KEY}`
};
Upload one or more PDF files to create a document group. Each upload returns a group ID for tracking.
POST /api/v1/documents/upload/
file
binary
PDF file (required)
grouping_id
uuid
Optional. Add to existing group.
document_type
string
Optional. Classification hint.
# Upload a PDF file
curl -X POST https://pdf2text.ai/api/v1/documents/upload/ \
-H "Authorization: Bearer $P2T_API_KEY" \
-F "file=@invoice.pdf"
# Add to existing group
curl -X POST https://pdf2text.ai/api/v1/documents/upload/ \
-H "Authorization: Bearer $P2T_API_KEY" \
-F "file=@invoice2.pdf" \
-F "grouping_id=a1b2c3d4-..."
import requests
url = "https://pdf2text.ai/api/v1/documents/upload/"
# Upload single file
with open("invoice.pdf", "rb") as f:
response = requests.post(
url,
headers=headers,
files={"file": f}
)
data = response.json()
group_id = data["group_id"]
const fs = require('fs');
const FormData = require('form-data');
const form = new FormData();
form.append('file', fs.createReadStream('invoice.pdf'));
const response = await fetch(
'https://pdf2text.ai/api/v1/documents/upload/',
{
method: 'POST',
headers: { ...headers, ...form.getHeaders() },
body: form
}
);
const { group_id } = await response.json();
{
"status": "success",
"group_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"documents": [
{
"id": 42,
"filename": "invoice.pdf",
"pages": 3
}
]
}
Kick off asynchronous processing for a document group. Enable auto-routing to automatically classify and extract each document.
POST /api/v1/groups/{group_id}/runs/
auto_route
boolean
Auto-classify (default: true)
template_id
uuid
Force specific template
callback_url
string
Webhook URL
callback_secret
string
HMAC signing secret
# Start extraction with auto-routing
curl -X POST https://pdf2text.ai/api/v1/groups/$GROUP_ID/runs/ \
-H "Authorization: Bearer $P2T_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"auto_route": true,
"callback_url": "https://your-app.com/webhook"
}'
# Start extraction run
url = f"https://pdf2text.ai/api/v1/groups/{group_id}/runs/"
response = requests.post(
url,
headers=headers,
json={
"auto_route": True,
"callback_url": "https://your-app.com/webhook"
}
)
run_data = response.json()
run_id = run_data["run_id"]
// Start extraction run
const response = await fetch(
`https://pdf2text.ai/api/v1/groups/${groupId}/runs/`,
{
method: 'POST',
headers: {
...headers,
'Content-Type': 'application/json'
},
body: JSON.stringify({
auto_route: true,
callback_url: 'https://your-app.com/webhook'
})
}
);
const { run_id } = await response.json();
{
"status": "queued",
"run_id": "run_abc123xyz",
"message": "Processing started"
}
Poll the run endpoint or use webhooks to retrieve extraction results. Export as JSON or Excel.
GET /api/runs/{run_id}/
GET /api/runs/{run_id}/export.xlsx
# Poll for results
curl https://pdf2text.ai/api/runs/$RUN_ID/ \
-H "Authorization: Bearer $P2T_API_KEY"
# Download as Excel
curl -o results.xlsx \
https://pdf2text.ai/api/runs/$RUN_ID/export.xlsx \
-H "Authorization: Bearer $P2T_API_KEY"
import time
# Poll until complete
while True:
response = requests.get(
f"https://pdf2text.ai/api/runs/{run_id}/",
headers=headers
)
data = response.json()
if data["status"] == "completed":
results = data["results"]
break
time.sleep(2) # Wait 2 seconds
# Download Excel export
excel = requests.get(
f"https://pdf2text.ai/api/runs/{run_id}/export.xlsx",
headers=headers
)
with open("results.xlsx", "wb") as f:
f.write(excel.content)
// Poll for results
const pollResults = async (runId) => {
while (true) {
const response = await fetch(
`https://pdf2text.ai/api/runs/${runId}/`,
{ headers }
);
const data = await response.json();
if (data.status === 'completed') {
return data.results;
}
await new Promise(r => setTimeout(r, 2000));
}
};
const results = await pollResults(runId);
{
"status": "completed",
"run_id": "run_abc123xyz",
"results": [
{
"document_id": 42,
"template": "invoice",
"confidence": 0.97,
"extracted": {
"vendor": "Acme Corp",
"total": "$1,234.56",
"date": "2024-01-15"
}
}
]
}
List available extraction templates. Use template IDs to force specific extraction schemas.
GET /api/templates/
# List available templates
curl https://pdf2text.ai/api/templates/ \
-H "Authorization: Bearer $P2T_API_KEY"
# List available templates
response = requests.get(
"https://pdf2text.ai/api/templates/",
headers=headers
)
templates = response.json()["templates"]
for t in templates:
print(f"{t['name']}: {t['id']}")
// List available templates
const response = await fetch(
'https://pdf2text.ai/api/templates/',
{ headers }
);
const { templates } = await response.json();
templates.forEach(t => console.log(`${t.name}: ${t.id}`));
{
"status": "success",
"templates": [
{
"id": "tmpl_invoice_v2",
"slug": "invoice",
"name": "Invoice",
"description": "Standard invoice extraction",
"extraction_mode": "structured"
},
{
"id": "tmpl_receipt_v1",
"slug": "receipt",
"name": "Receipt",
"description": "Receipt and expense extraction",
"extraction_mode": "structured"
}
]
}
Use the Review Queue to validate extractions and maintain an audit trail.
# Set your API key
export P2T_API_KEY="your_api_key_here"
# Include in all requests
curl -H "Authorization: Bearer $P2T_API_KEY" ...
import requests
import os
API_KEY = os.environ["P2T_API_KEY"]
headers = {"Authorization": f"Bearer {API_KEY}"}
const API_KEY = process.env.P2T_API_KEY;
const headers = {
'Authorization': `Bearer ${API_KEY}`
};
# Upload a PDF file
curl -X POST https://pdf2text.ai/api/v1/documents/upload/ \
-H "Authorization: Bearer $P2T_API_KEY" \
-F "file=@invoice.pdf"
# Add to existing group
curl -X POST https://pdf2text.ai/api/v1/documents/upload/ \
-H "Authorization: Bearer $P2T_API_KEY" \
-F "file=@invoice2.pdf" \
-F "grouping_id=a1b2c3d4-..."
import requests
url = "https://pdf2text.ai/api/v1/documents/upload/"
# Upload single file
with open("invoice.pdf", "rb") as f:
response = requests.post(
url,
headers=headers,
files={"file": f}
)
data = response.json()
group_id = data["group_id"]
const fs = require('fs');
const FormData = require('form-data');
const form = new FormData();
form.append('file', fs.createReadStream('invoice.pdf'));
const response = await fetch(
'https://pdf2text.ai/api/v1/documents/upload/',
{
method: 'POST',
headers: { ...headers, ...form.getHeaders() },
body: form
}
);
const { group_id } = await response.json();
{
"status": "success",
"group_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"documents": [
{
"id": 42,
"filename": "invoice.pdf",
"pages": 3
}
]
}
# Start extraction with auto-routing
curl -X POST https://pdf2text.ai/api/v1/groups/$GROUP_ID/runs/ \
-H "Authorization: Bearer $P2T_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"auto_route": true,
"callback_url": "https://your-app.com/webhook"
}'
# Start extraction run
url = f"https://pdf2text.ai/api/v1/groups/{group_id}/runs/"
response = requests.post(
url,
headers=headers,
json={
"auto_route": True,
"callback_url": "https://your-app.com/webhook"
}
)
run_data = response.json()
run_id = run_data["run_id"]
// Start extraction run
const response = await fetch(
`https://pdf2text.ai/api/v1/groups/${groupId}/runs/`,
{
method: 'POST',
headers: {
...headers,
'Content-Type': 'application/json'
},
body: JSON.stringify({
auto_route: true,
callback_url: 'https://your-app.com/webhook'
})
}
);
const { run_id } = await response.json();
{
"status": "queued",
"run_id": "run_abc123xyz",
"message": "Processing started"
}
# Poll for results
curl https://pdf2text.ai/api/runs/$RUN_ID/ \
-H "Authorization: Bearer $P2T_API_KEY"
# Download as Excel
curl -o results.xlsx \
https://pdf2text.ai/api/runs/$RUN_ID/export.xlsx \
-H "Authorization: Bearer $P2T_API_KEY"
import time
# Poll until complete
while True:
response = requests.get(
f"https://pdf2text.ai/api/runs/{run_id}/",
headers=headers
)
data = response.json()
if data["status"] == "completed":
results = data["results"]
break
time.sleep(2) # Wait 2 seconds
# Download Excel export
excel = requests.get(
f"https://pdf2text.ai/api/runs/{run_id}/export.xlsx",
headers=headers
)
with open("results.xlsx", "wb") as f:
f.write(excel.content)
// Poll for results
const pollResults = async (runId) => {
while (true) {
const response = await fetch(
`https://pdf2text.ai/api/runs/${runId}/`,
{ headers }
);
const data = await response.json();
if (data.status === 'completed') {
return data.results;
}
await new Promise(r => setTimeout(r, 2000));
}
};
const results = await pollResults(runId);
{
"status": "completed",
"run_id": "run_abc123xyz",
"results": [
{
"document_id": 42,
"template": "invoice",
"confidence": 0.97,
"extracted": {
"vendor": "Acme Corp",
"total": "$1,234.56",
"date": "2024-01-15"
}
}
]
}
# List available templates
curl https://pdf2text.ai/api/templates/ \
-H "Authorization: Bearer $P2T_API_KEY"
# List available templates
response = requests.get(
"https://pdf2text.ai/api/templates/",
headers=headers
)
templates = response.json()["templates"]
for t in templates:
print(f"{t['name']}: {t['id']}")
// List available templates
const response = await fetch(
'https://pdf2text.ai/api/templates/',
{ headers }
);
const { templates } = await response.json();
templates.forEach(t => console.log(`${t.name}: ${t.id}`));
{
"status": "success",
"templates": [
{
"id": "tmpl_invoice_v2",
"slug": "invoice",
"name": "Invoice",
"description": "Standard invoice extraction",
"extraction_mode": "structured"
},
{
"id": "tmpl_receipt_v1",
"slug": "receipt",
"name": "Receipt",
"description": "Receipt and expense extraction",
"extraction_mode": "structured"
}
]
}