Batch Processing¶
Submit and process multiple jobs efficiently using the MicroDC client.
Basic Batch Pattern¶
from microdc import Client, LLMComplete
client = Client(api_key="mDC_...")
questions = [
"What is Python?",
"Explain machine learning",
"What is a neural network?"
]
# Submit all jobs
job_ids = []
for question in questions:
job = LLMComplete(model="llama3.3")
job.set_prompt(question)
job.metadata = {"question": question}
job_id = client.send_job(job)
job_ids.append(job_id)
# Wait for all to complete
client.wait_for_all(timeout=300)
# Collect results
for job_id in job_ids:
details = client.get_job_details(job_id)
print(f"Q: {details.metadata['question']}")
print(f"A: {details.result['choices'][0]['message']['content']}\n")
if details.is_successful():
client.acknowledge_job(job_id)
Batch with Callbacks¶
Process results as they arrive:
from microdc import Client, LLMComplete
completed = []
def on_complete(client: Client, job_id: str):
details = client.get_job_details(job_id)
if details.is_successful():
completed.append({
"question": details.metadata["question"],
"answer": details.result['choices'][0]['message']['content']
})
client.acknowledge_job(job_id)
print(f"Completed {len(completed)}/{total} jobs")
client = Client(api_key="mDC_...")
client.set_callback(on_complete)
questions = ["Q1?", "Q2?", "Q3?", "Q4?", "Q5?"]
total = len(questions)
for question in questions:
job = LLMComplete(model="llama3.3")
job.set_prompt(question)
job.metadata = {"question": question}
client.send_job(job)
client.wait_for_all(timeout=300)
print(f"All done: {len(completed)} results")
Batch Document Processing¶
from microdc import Client, DocumentCall
client = Client(api_key="mDC_...")
# Upload all files
documents = ["doc1.pdf", "doc2.pdf", "doc3.pdf"]
file_tokens = []
for doc_path in documents:
upload_result = client.upload_file(doc_path)
file_tokens.append(upload_result['id'])
# Submit all processing jobs
job_ids = []
for i, token in enumerate(file_tokens):
job = DocumentCall(model="docling")
job.add_file(token)
job.metadata = {"filename": documents[i]}
job_ids.append(client.send_job(job))
# Wait and collect
client.wait_for_all(timeout=600)
for job_id in job_ids:
details = client.get_job_details(job_id)
filename = details.metadata['filename']
status = "OK" if details.is_successful() else "FAILED"
print(f" [{status}] {filename}")
if details.is_successful():
client.acknowledge_job(job_id)
Batch Embeddings¶
from microdc import Client, LLMEmbed
client = Client(api_key="mDC_...")
texts = [f"Document {i} content..." for i in range(100)]
chunk_size = 25
job_ids = []
for i in range(0, len(texts), chunk_size):
chunk = texts[i:i + chunk_size]
job = LLMEmbed(model="text-embedding-ada-002")
job.add_texts(chunk)
job.metadata = {"start_index": i}
job_ids.append(client.send_job(job))
client.wait_for_all()
all_embeddings = []
for job_id in job_ids:
details = client.get_job_details(job_id)
if details.is_successful():
all_embeddings.extend(details.result['embeddings'])
print(f"Generated {len(all_embeddings)} embeddings")
Error Handling in Batches¶
from microdc import Client, LLMComplete
client = Client(api_key="mDC_...")
job_ids = []
for prompt in prompts:
job = LLMComplete(model="llama3.3")
job.set_prompt(prompt)
job_ids.append(client.send_job(job))
client.wait_for_all(timeout=300)
succeeded = []
failed = []
for job_id in job_ids:
details = client.get_job_details(job_id)
if details.is_successful():
succeeded.append(details)
client.acknowledge_job(job_id)
else:
failed.append(details)
print(f"Succeeded: {len(succeeded)}, Failed: {len(failed)}")
for f in failed:
print(f" Failed job {f.job_id}: {f.error_message}")
Notebook example
See the Batch Processing notebook for an interactive walkthrough with progress tracking and performance metrics.