Export Pipeline

Learn how to export research data, download reports, and build automated data pipelines.

Overview

The SDK provides several ways to export and download your research data:

Export summaries with all related data
Download study and report PDFs
Get file download URLs
Build automated export workflows

This guide covers best practices for exporting and integrating with your data pipeline.

Exporting Summaries

Export summaries with all related inputs and data.

Basic Summary Export

python

from syntheticusers import ApiClient, Configuration, SummariesApi

configuration = Configuration(
    host="https://api.syntheticusers.com/api/v1",
    access_token="your-access-token"
)

with ApiClient(configuration) as api_client:
    summaries_api = SummariesApi(api_client)
    
    # Export a summary as text
    export_data = summaries_api.export_summary_v1(
        summary_id="your-summary-id"
    )
    
    # Save to file
    with open("summary_export.txt", "w") as f:
        f.write(export_data)
    
    print("✓ Summary exported successfully")

Export All Summaries in a Study

python

from syntheticusers import SummariesApi

def export_all_study_summaries(api_client, study_id, output_dir="exports"):
    """Export all summaries from a study"""
    import os
    
    summaries_api = SummariesApi(api_client)
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all summaries for the study
    page = 1
    exported_count = 0
    
    while True:
        response = summaries_api.list_summaries_v1(
            study_id=study_id,
            page=page,
            page_size=50
        )
        
        for summary in response.items:
            try:
                # Export each summary
                export_data = summaries_api.export_summary_v1(
                    summary_id=summary.id
                )
                
                # Save to file
                filename = f"{output_dir}/summary_{summary.id}.txt"
                with open(filename, "w") as f:
                    f.write(export_data)
                
                exported_count += 1
                print(f"✓ Exported summary {summary.id}")
            except Exception as e:
                print(f"✗ Failed to export {summary.id}: {e}")
        
        if len(response.items) < 50:
            break
        page += 1
    
    print(f"\nExported {exported_count} summaries to {output_dir}/")
    return exported_count

# Usage
with ApiClient(configuration) as api_client:
    export_all_study_summaries(
        api_client,
        study_id="your-study-id",
        output_dir="study_exports"
    )

Downloading PDF Reports

Download study and report PDFs for sharing and archiving.

Download Study PDF

python

from syntheticusers import StudiesApi

with ApiClient(configuration) as api_client:
    studies_api = StudiesApi(api_client)
    
    # Get PDF as bytes
    pdf_bytes = studies_api.get_study_pdf_v1(
        study_id="your-study-id"
    )
    
    # Save to file
    with open("study_report.pdf", "wb") as f:
        f.write(pdf_bytes)
    
    print("✓ Study PDF downloaded")

Download Report PDF

python

from syntheticusers import ReportsApi

with ApiClient(configuration) as api_client:
    reports_api = ReportsApi(api_client)
    
    # Get report PDF
    pdf_bytes = reports_api.get_report_pdf_v1(
        report_id="your-report-id"
    )
    
    # Save to file
    with open("report.pdf", "wb") as f:
        f.write(pdf_bytes)
    
    print("✓ Report PDF downloaded")

Batch Download All Study PDFs

python

from syntheticusers import StudiesApi
import os

def download_all_study_pdfs(api_client, project_id, output_dir="pdfs"):
    """Download PDFs for all studies in a project"""
    studies_api = StudiesApi(api_client)
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all studies
    page = 1
    downloaded_count = 0
    
    while True:
        response = studies_api.list_studies_v1(
            project_id=project_id,
            page=page,
            page_size=50
        )
        
        for study in response.items:
            try:
                # Download PDF
                pdf_bytes = studies_api.get_study_pdf_v1(study_id=study.id)
                
                # Save with meaningful filename
                safe_name = study.description[:50].replace("/", "_")
                filename = f"{output_dir}/{safe_name}_{study.id}.pdf"
                
                with open(filename, "wb") as f:
                    f.write(pdf_bytes)
                
                downloaded_count += 1
                print(f"✓ Downloaded {study.id}")
            except Exception as e:
                print(f"✗ Failed to download {study.id}: {e}")
        
        if len(response.items) < 50:
            break
        page += 1
    
    print(f"\nDownloaded {downloaded_count} PDFs to {output_dir}/")
    return downloaded_count

with ApiClient(configuration) as api_client:
    download_all_study_pdfs(
        api_client,
        project_id="your-project-id"
    )

Working with Files

Download uploaded files and manage file downloads.

Get File Download URL

python

from syntheticusers import FilesApi

with ApiClient(configuration) as api_client:
    files_api = FilesApi(api_client)
    
    # Get presigned download URL
    file_with_url = files_api.get_file_download_url_v1(
        file_id="your-file-id"
    )
    
    print(f"Download URL: {file_with_url.download_url}")
    print(f"URL expires at: {file_with_url.expires_at}")

Download a File

python

import requests
from syntheticusers import FilesApi

with ApiClient(configuration) as api_client:
    files_api = FilesApi(api_client)
    
    # Get file metadata and download URL
    file_with_url = files_api.get_file_download_url_v1(
        file_id="your-file-id"
    )
    
    # Download the file
    response = requests.get(file_with_url.download_url)
    
    if response.status_code == 200:
        # Save to disk
        with open(file_with_url.name, "wb") as f:
            f.write(response.content)
        print(f"✓ Downloaded {file_with_url.name}")
    else:
        print(f"✗ Download failed: {response.status_code}")

Download All Project Files

python

import requests
from syntheticusers import FilesApi
import os

def download_all_project_files(api_client, project_id, output_dir="files"):
    """Download all files from a project"""
    files_api = FilesApi(api_client)
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all files
    page = 1
    downloaded_count = 0
    
    while True:
        response = files_api.list_files_v1(
            project_id=project_id,
            page=page,
            page_size=50
        )
        
        for file in response.items:
            try:
                # Get download URL
                file_with_url = files_api.get_file_download_url_v1(
                    file_id=file.id
                )
                
                # Download
                download_response = requests.get(file_with_url.download_url)
                
                if download_response.status_code == 200:
                    filepath = f"{output_dir}/{file_with_url.name}"
                    with open(filepath, "wb") as f:
                        f.write(download_response.content)
                    downloaded_count += 1
                    print(f"✓ Downloaded {file_with_url.name}")
                else:
                    print(f"✗ Failed to download {file.id}")
            except Exception as e:
                print(f"✗ Error downloading {file.id}: {e}")
        
        if len(response.items) < 50:
            break
        page += 1
    
    print(f"\nDownloaded {downloaded_count} files to {output_dir}/")
    return downloaded_count

with ApiClient(configuration) as api_client:
    download_all_project_files(
        api_client,
        project_id="your-project-id"
    )

Building Export Pipelines

Create automated export workflows for regular data exports.

Complete Export Pipeline

python

import os
from datetime import datetime
from syntheticusers import (
    ApiClient,
    Configuration,
    StudiesApi,
    SummariesApi,
    ReportsApi,
    FilesApi
)

def export_project_data(project_id, access_token, base_dir="exports"):
    """
    Complete export pipeline for a project.
    Exports all studies, summaries, reports, and files.
    """
    configuration = Configuration(
        host="https://api.syntheticusers.com/api/v1",
        access_token=access_token
    )
    
    # Create timestamped export directory
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    export_dir = f"{base_dir}/project_{project_id}_{timestamp}"
    os.makedirs(export_dir, exist_ok=True)
    
    print(f"Starting export to {export_dir}/")
    
    with ApiClient(configuration) as api_client:
        studies_api = StudiesApi(api_client)
        summaries_api = SummariesApi(api_client)
        reports_api = ReportsApi(api_client)
        
        # 1. Export all studies as PDFs
        print("\n📄 Exporting studies...")
        studies_dir = f"{export_dir}/studies"
        os.makedirs(studies_dir, exist_ok=True)
        
        page = 1
        study_count = 0
        
        while True:
            response = studies_api.list_studies_v1(
                project_id=project_id,
                page=page,
                page_size=50
            )
            
            for study in response.items:
                try:
                    pdf_bytes = studies_api.get_study_pdf_v1(study_id=study.id)
                    filename = f"{studies_dir}/study_{study.id}.pdf"
                    with open(filename, "wb") as f:
                        f.write(pdf_bytes)
                    study_count += 1
                except Exception as e:
                    print(f"  ✗ Failed to export study {study.id}: {e}")
            
            if len(response.items) < 50:
                break
            page += 1
        
        print(f"  ✓ Exported {study_count} studies")
        
        # 2. Export all summaries
        print("\n📝 Exporting summaries...")
        summaries_dir = f"{export_dir}/summaries"
        os.makedirs(summaries_dir, exist_ok=True)
        
        page = 1
        summary_count = 0
        
        while True:
            response = summaries_api.list_summaries_v1(
                project_id=project_id,
                page=page,
                page_size=50
            )
            
            for summary in response.items:
                try:
                    export_data = summaries_api.export_summary_v1(
                        summary_id=summary.id
                    )
                    filename = f"{summaries_dir}/summary_{summary.id}.txt"
                    with open(filename, "w") as f:
                        f.write(export_data)
                    summary_count += 1
                except Exception as e:
                    print(f"  ✗ Failed to export summary {summary.id}: {e}")
            
            if len(response.items) < 50:
                break
            page += 1
        
        print(f"  ✓ Exported {summary_count} summaries")
        
        # 3. Download all files
        print("\n📎 Downloading files...")
        files_count = download_all_project_files(
            api_client,
            project_id,
            output_dir=f"{export_dir}/files"
        )
        
    print(f"\n✅ Export complete!")
    print(f"   Location: {export_dir}/")
    print(f"   Studies: {study_count}")
    print(f"   Summaries: {summary_count}")
    print(f"   Files: {files_count}")
    
    return export_dir

# Usage
export_dir = export_project_data(
    project_id="your-project-id",
    access_token="your-access-token"
)

Scheduled Export Job

python

import schedule
import time

def scheduled_export_job():
    """Run export on a schedule"""
    print(f"Running scheduled export at {datetime.now()}")
    try:
        export_project_data(
            project_id="your-project-id",
            access_token="your-access-token",
            base_dir="/backups/exports"
        )
    except Exception as e:
        print(f"Export job failed: {e}")

# Schedule export to run daily at 2 AM
schedule.every().day.at("02:00").do(scheduled_export_job)

print("Export scheduler started. Press Ctrl+C to exit.")
while True:
    schedule.run_pending()
    time.sleep(60)

Exporting to Different Formats

Convert exported data to various formats for analysis.

Export to JSON

python

import json
from syntheticusers import InterviewsApi, SyntheticUsersApi

def export_interviews_to_json(api_client, study_id, output_file="interviews.json"):
    """Export all interviews as JSON"""
    interviews_api = InterviewsApi(api_client)
    synthetic_users_api = SyntheticUsersApi(api_client)
    
    # Get all interviews
    interviews = []
    page = 1
    
    while True:
        response = interviews_api.list_interviews_v1(
            study_id=study_id,
            page=page,
            page_size=50
        )
        
        for interview in response.items:
            # Get full interview details
            full_interview = interviews_api.get_interview_v1(
                interview_id=interview.id
            )
            
            # Convert to dict
            interviews.append({
                "id": full_interview.id,
                "synthetic_user_id": full_interview.synthetic_user_id,
                "status": full_interview.status,
                "created_at": full_interview.created_at.isoformat(),
                # Add other fields as needed
            })
        
        if len(response.items) < 50:
            break
        page += 1
    
    # Save as JSON
    with open(output_file, "w") as f:
        json.dump(interviews, f, indent=2)
    
    print(f"✓ Exported {len(interviews)} interviews to {output_file}")

with ApiClient(configuration) as api_client:
    export_interviews_to_json(
        api_client,
        study_id="your-study-id"
    )

Export to CSV

python

import csv
from syntheticusers import ConversationsApi

def export_conversations_to_csv(api_client, study_id, output_file="conversations.csv"):
    """Export conversations as CSV"""
    conversations_api = ConversationsApi(api_client)
    
    # Get all conversations
    page = 1
    
    with open(output_file, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        
        # Write header
        writer.writerow([
            "conversation_id",
            "interview_id",
            "synthetic_user_id",
            "message",
            "role",
            "timestamp"
        ])
        
        conversation_count = 0
        
        while True:
            response = conversations_api.list_conversations_v1(
                study_id=study_id,
                page=page,
                page_size=50
            )
            
            for conv in response.items:
                # Get full conversation details
                full_conv = conversations_api.get_conversation_v1(
                    conversation_id=conv.id
                )
                
                # Write each message as a row
                for message in full_conv.messages:
                    writer.writerow([
                        conv.id,
                        conv.interview_id,
                        conv.synthetic_user_id,
                        message.get("content", ""),
                        message.get("role", ""),
                        message.get("timestamp", "")
                    ])
                
                conversation_count += 1
            
            if len(response.items) < 50:
                break
            page += 1
    
    print(f"✓ Exported {conversation_count} conversations to {output_file}")

with ApiClient(configuration) as api_client:
    export_conversations_to_csv(
        api_client,
        study_id="your-study-id"
    )

Next Steps

Batch Processing - Learn how to process data at scale
API Reference - Browse all available export methods
Quick Start - Review the basic workflow

Export Pipeline ​

Overview ​

Exporting Summaries ​

Basic Summary Export ​

Export All Summaries in a Study ​

Downloading PDF Reports ​

Download Study PDF ​

Download Report PDF ​

Batch Download All Study PDFs ​

Working with Files ​

Get File Download URL ​

Download a File ​

Download All Project Files ​

Building Export Pipelines ​

Complete Export Pipeline ​

Scheduled Export Job ​

Exporting to Different Formats ​

Export to JSON ​

Export to CSV ​

Next Steps ​

Export Pipeline

Overview

Exporting Summaries

Basic Summary Export

Export All Summaries in a Study

Downloading PDF Reports

Download Study PDF

Download Report PDF

Batch Download All Study PDFs

Working with Files

Get File Download URL

Download a File

Download All Project Files

Building Export Pipelines

Complete Export Pipeline

Scheduled Export Job

Exporting to Different Formats

Export to JSON

Export to CSV

Next Steps