DeepCitation + Python
DeepCitation doesn’t have a Python SDK yet, but the REST API works with any HTTP client. This guide shows the full workflow using httpx (or requests).
Install
pip install httpx python-dotenv
Full Workflow
import httpx
import os
from dotenv import load_dotenv
load_dotenv()
API_KEY = os.environ["DEEPCITATION_API_KEY"]
BASE_URL = "https://api.deepcitation.com"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}
# 1. Upload a document
def prepare_attachment(file_path: str) -> dict:
with open(file_path, "rb") as f:
response = httpx.post(
f"{BASE_URL}/prepareAttachments",
headers=HEADERS,
files={"file": (os.path.basename(file_path), f)},
timeout=60.0,
)
response.raise_for_status()
return response.json()
# 2. Verify citations
def verify_citations(attachment_id: str, citations: dict) -> dict:
response = httpx.post(
f"{BASE_URL}/verifyCitations",
headers={**HEADERS, "Content-Type": "application/json"},
json={
"data": {
"attachmentId": attachment_id,
"citations": citations,
"outputImageFormat": "avif",
}
},
timeout=30.0,
)
response.raise_for_status()
return response.json()
# Usage
result = prepare_attachment("report.pdf")
attachment_id = result["attachmentId"]
deep_text_pages = result["deepTextPages"]
# ... call your LLM with deep_text_pages injected into the prompt ...
# ... parse the <<<CITATION_DATA>>> block from the LLM response ...
verifications = verify_citations(attachment_id, parsed_citations)
for key, v in verifications["verifications"].items():
status = v.get("searchState", {}).get("status", "unknown")
print(f"Citation {key}: {status}")
Prepare a URL
def prepare_url(url: str) -> dict:
response = httpx.post(
f"{BASE_URL}/prepareAttachments",
headers={**HEADERS, "Content-Type": "application/json"},
json={"url": url},
timeout=60.0,
)
response.raise_for_status()
return response.json()
FastAPI Route Handlers
from fastapi import FastAPI, UploadFile, File
from pydantic import BaseModel
import httpx
app = FastAPI()
@app.post("/upload")
async def upload_document(file: UploadFile = File(...)):
async with httpx.AsyncClient() as client:
response = await client.post(
f"{BASE_URL}/prepareAttachments",
headers=HEADERS,
files={"file": (file.filename, await file.read(), file.content_type)},
timeout=60.0,
)
response.raise_for_status()
return response.json()
class VerifyRequest(BaseModel):
attachment_id: str
citations: dict
@app.post("/verify")
async def verify(request: VerifyRequest):
async with httpx.AsyncClient() as client:
response = await client.post(
f"{BASE_URL}/verifyCitations",
headers={**HEADERS, "Content-Type": "application/json"},
json={
"data": {
"attachmentId": request.attachment_id,
"citations": request.citations,
}
},
timeout=30.0,
)
response.raise_for_status()
return response.json()
Parsing the Citation Block
The LLM outputs citations in a <<<CITATION_DATA>>> block. Here’s a minimal parser:
import json
import re
def parse_citations(llm_output: str) -> dict:
"""Extract citations from the <<<CITATION_DATA>>> block."""
match = re.search(
r"<<<CITATION_DATA>>>\s*(.*?)\s*<<<END_CITATION_DATA>>>",
llm_output,
re.DOTALL,
)
if not match:
return {}
data = json.loads(match.group(1))
citations = {}
for attachment_id, items in data.items():
for item in items:
# LLMs may use shorthand keys (n, f, k, p, l) to save tokens
cid = item.get("id") or item.get("n")
citations[f"{attachment_id}_{cid}"] = {
"sourceContext": item.get("source_context") or item.get("f", ""),
"sourceMatch": item.get("source_match") or item.get("k", ""),
"pageId": item.get("page_id") or item.get("p", ""),
"lineIds": item.get("line_ids") or item.get("l", []),
}
return citations
def strip_citation_block(llm_output: str) -> str:
"""Remove the citation block to get user-visible text."""
return re.sub(
r"\s*<<<CITATION_DATA>>>.*?<<<END_CITATION_DATA>>>\s*",
"",
llm_output,
flags=re.DOTALL,
).strip()
Error Handling
def handle_dc_error(response: httpx.Response):
if response.status_code == 401:
raise Exception("Invalid API key — check DEEPCITATION_API_KEY")
elif response.status_code == 429:
raise Exception("Billing limit exceeded — add a payment method")
elif response.status_code == 413:
raise Exception("File too large — max 100 MB")
elif response.status_code >= 500:
raise Exception(f"Server error ({response.status_code}) — safe to retry")
else:
response.raise_for_status()
Response Shape
The REST API returns searchState.status (e.g., "found", "not_found"). If you later migrate to the TypeScript SDK, the status is normalized to verification.status. See Verification Statuses for all possible values.
Next Steps
- API Reference — Full REST endpoint documentation
- Curl Guide — Direct API usage examples
- Verification Statuses — Understanding verification results