Appearance
Batch Processing
Learn how to efficiently process large volumes of interviews, studies, and synthetic users at scale.
Overview
When working with production workloads, you'll often need to:
- Process multiple interviews in parallel
- Iterate through paginated lists of results
- Filter and batch process specific subsets of data
- Handle rate limiting and retries gracefully
This guide covers best practices for batch processing operations.
Iterating Through Paginated Results
Most list endpoints return paginated results. Here's how to iterate through all items:
List All Interviews
typescript
import { Configuration, InterviewsApi } from '@syntheticusers/sdk'
const configuration = new Configuration({
basePath: 'https://api.syntheticusers.com/api/v1',
accessToken: 'your-access-token'
})
const interviewsApi = new InterviewsApi(configuration)
async function getAllInterviews(projectId: string) {
const allInterviews = []
let page = 1
const pageSize = 50
while (true) {
// Fetch page of results
const response = await interviewsApi.listInterviewsV1({
projectId,
page,
pageSize
})
// Add items to our collection
allInterviews.push(...response.items)
// Check if we've reached the end
if (response.items.length < pageSize) {
break
}
page++
}
console.log(`Total interviews: ${allInterviews.length}`)
return allInterviews
}
// Usage
const interviews = await getAllInterviews('your-project-id')Generic Pagination Helper
Create a reusable helper to paginate any endpoint:
typescript
interface PaginatedResponse<T> {
items: T[]
total?: number
page?: number
}
async function paginateAll<T>(
apiMethod: (params: any) => Promise<PaginatedResponse<T>>,
baseParams: Record<string, any> = {},
pageSize: number = 50
): Promise<T[]> {
const allItems: T[] = []
let page = 1
while (true) {
const response = await apiMethod({
...baseParams,
page,
pageSize
})
allItems.push(...response.items)
if (response.items.length < pageSize) {
break
}
page++
}
return allItems
}
// Usage
const allInterviews = await paginateAll(
(params) => interviewsApi.listInterviewsV1(params),
{ projectId: 'your-project-id', status: 'completed' }
)Filtering and Batch Processing
Use filters to process specific subsets of data efficiently.
Process Interviews by Status
typescript
import { Configuration, InterviewsApi, SummariesApi } from '@syntheticusers/sdk'
const configuration = new Configuration({
basePath: 'https://api.syntheticusers.com/api/v1',
accessToken: 'your-access-token'
})
const interviewsApi = new InterviewsApi(configuration)
const summariesApi = new SummariesApi(configuration)
async function processCompletedInterviews(projectId: string) {
// Get all completed interviews
const completedInterviews = await paginateAll(
(params) => interviewsApi.listInterviewsV1(params),
{ projectId, status: 'completed' }
)
console.log(`Processing ${completedInterviews.length} completed interviews...`)
// Process each interview
for (const interview of completedInterviews) {
try {
// Example: Generate summary for each interview
const summary = await summariesApi.createSummaryV1({
projectId,
summaryCreate: {
studyId: interview.studyId,
interviewIds: [interview.id]
}
})
console.log(`✓ Generated summary for interview ${interview.id}`)
} catch (error) {
console.error(`✗ Failed to process interview ${interview.id}:`, error)
}
}
}
await processCompletedInterviews('your-project-id')Batch by Audience
typescript
import { SyntheticUsersApi } from '@syntheticusers/sdk'
const syntheticUsersApi = new SyntheticUsersApi(configuration)
async function groupByPersona(projectId: string, audienceId: string) {
const users = await paginateAll(
(params) => syntheticUsersApi.listSyntheticUsersV1(params),
{ projectId, audienceId }
)
console.log(`Found ${users.length} synthetic users in audience`)
// Group by persona type
const byPersona = users.reduce((acc, user) => {
const personaType = user.persona?.type || 'default'
if (!acc[personaType]) {
acc[personaType] = []
}
acc[personaType].push(user)
return acc
}, {} as Record<string, typeof users>)
for (const [personaType, usersList] of Object.entries(byPersona)) {
console.log(`${personaType}: ${usersList.length} users`)
}
return byPersona
}
await groupByPersona('your-project-id', 'your-audience-id')Parallel Processing
Use Promise.all() or Promise.allSettled() for concurrent operations:
Process Interviews in Parallel
typescript
async function processInterview(interviewId: string): Promise<{ success: boolean, interviewId: string, error?: string }> {
try {
const interview = await interviewsApi.getInterviewV1({ interviewId })
// Do something with the interview
return { success: true, interviewId }
} catch (error) {
return {
success: false,
interviewId,
error: error instanceof Error ? error.message : String(error)
}
}
}
async function processAllInterviewsParallel(projectId: string, concurrency: number = 5) {
const interviews = await paginateAll(
(params) => interviewsApi.listInterviewsV1(params),
{ projectId }
)
const interviewIds = interviews.map(i => i.id)
// Process in batches for controlled concurrency
for (let i = 0; i < interviewIds.length; i += concurrency) {
const batch = interviewIds.slice(i, i + concurrency)
const results = await Promise.allSettled(
batch.map(id => processInterview(id))
)
results.forEach((result, index) => {
if (result.status === 'fulfilled') {
if (result.value.success) {
console.log(`✓ Processed ${result.value.interviewId}`)
} else {
console.error(`✗ Failed ${result.value.interviewId}: ${result.value.error}`)
}
} else {
console.error(`✗ Error processing ${batch[index]}: ${result.reason}`)
}
})
}
}
await processAllInterviewsParallel('your-project-id')Using p-limit for Better Concurrency Control
typescript
import pLimit from 'p-limit' // npm install p-limit
async function processWithLimit(projectId: string, maxConcurrency: number = 5) {
const limit = pLimit(maxConcurrency)
const interviews = await paginateAll(
(params) => interviewsApi.listInterviewsV1(params),
{ projectId }
)
const promises = interviews.map(interview =>
limit(() => processInterview(interview.id))
)
const results = await Promise.all(promises)
const successful = results.filter(r => r.success).length
const failed = results.filter(r => !r.success).length
console.log(`Completed: ${successful} successful, ${failed} failed`)
}
await processWithLimit('your-project-id')Rate Limiting Best Practices
Handle rate limits gracefully with exponential backoff:
typescript
async function apiCallWithRetry<T>(
apiMethod: () => Promise<T>,
maxRetries: number = 3
): Promise<T> {
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
return await apiMethod()
} catch (error: any) {
// Check if it's a rate limit error (429)
if (error.response?.status === 429 && attempt < maxRetries - 1) {
const waitTime = Math.pow(2, attempt) * 1000 // 1s, 2s, 4s
console.log(`Rate limited. Waiting ${waitTime}ms before retry...`)
await new Promise(resolve => setTimeout(resolve, waitTime))
} else {
throw error
}
}
}
throw new Error('Max retries exceeded')
}
// Usage
const interview = await apiCallWithRetry(() =>
interviewsApi.getInterviewV1({ interviewId: 'some-interview-id' })
)Bulk Operations
Use bulk endpoints when available for better performance:
Extend an Audience
typescript
import { AudiencesApi } from '@syntheticusers/sdk'
const audiencesApi = new AudiencesApi(configuration)
async function extendAudience(audienceId: string, additionalUsers: number) {
const result = await audiencesApi.extendAudienceV1({
audienceId,
audienceExtend: {
size: additionalUsers
}
})
console.log(`Added ${result.newUsers?.length || 0} users to audience`)
return result
}
await extendAudience('your-audience-id', 50)Regenerate All Interviews
typescript
import { StudiesApi } from '@syntheticusers/sdk'
const studiesApi = new StudiesApi(configuration)
async function regenerateStudyInterviews(studyId: string) {
const result = await studiesApi.regenerateAllInterviewsV1({ studyId })
console.log(`Regenerating ${result.totalInterviews} interviews`)
return result
}
await regenerateStudyInterviews('your-study-id')Progress Tracking
Track progress for long-running batch operations:
typescript
import cliProgress from 'cli-progress' // npm install cli-progress
async function processWithProgress(projectId: string) {
const interviews = await paginateAll(
(params) => interviewsApi.listInterviewsV1(params),
{ projectId }
)
const progressBar = new cliProgress.SingleBar({
format: 'Processing |{bar}| {percentage}% | {value}/{total} interviews',
barCompleteChar: '\u2588',
barIncompleteChar: '\u2591'
})
progressBar.start(interviews.length, 0)
const results = []
for (const [index, interview] of interviews.entries()) {
try {
const result = await processInterview(interview.id)
results.push(result)
} catch (error) {
console.error(`\nError processing ${interview.id}:`, error)
}
progressBar.update(index + 1)
}
progressBar.stop()
console.log(`\nCompleted: ${results.length}/${interviews.length}`)
}
await processWithProgress('your-project-id')Error Handling Strategies
Implement robust error handling for batch operations:
typescript
interface BatchResult<T> {
successful: Array<{ item: T; result: any }>
failed: Array<{ item: T; error: string }>
startTime: Date
endTime?: Date
duration?: number
}
async function batchProcessWithErrorHandling<T>(
items: T[],
processFunc: (item: T) => Promise<any>
): Promise<BatchResult<T>> {
const results: BatchResult<T> = {
successful: [],
failed: [],
startTime: new Date()
}
for (const item of items) {
try {
const result = await processFunc(item)
results.successful.push({ item, result })
console.log(`✓ Processed ${(item as any).id}`)
} catch (error) {
results.failed.push({
item,
error: error instanceof Error ? error.message : String(error)
})
console.error(`✗ Failed ${(item as any).id}:`, error)
}
}
results.endTime = new Date()
results.duration = (results.endTime.getTime() - results.startTime.getTime()) / 1000
console.log(`
Batch Processing Summary:
- Total: ${items.length}
- Successful: ${results.successful.length}
- Failed: ${results.failed.length}
- Duration: ${results.duration.toFixed(2)}s
`)
return results
}
// Usage
async function processSingleInterview(interview: any) {
// Your processing logic
return { processed: true }
}
const interviews = await paginateAll(
(params) => interviewsApi.listInterviewsV1(params),
{ projectId: 'your-project-id' }
)
const results = await batchProcessWithErrorHandling(
interviews,
processSingleInterview
)
// Save failed items for retry
if (results.failed.length > 0) {
const fs = await import('fs')
const failedList = results.failed.map(
f => `${(f.item as any).id}: ${f.error}`
).join('\n')
fs.writeFileSync('failed_interviews.txt', failedList)
}Next Steps
- Export Pipeline - Learn how to export and download results
- API Reference - Browse all available API methods
- Quick Start - Review the basic workflow