Skip to content

Export Pipeline

Learn how to export research data, download reports, and build automated data pipelines.

Overview

The SDK provides several ways to export and download your research data:

  • Export summaries with all related data
  • Download study and report PDFs
  • Get file download URLs
  • Build automated export workflows

This guide covers best practices for exporting and integrating with your data pipeline.

Exporting Summaries

Export summaries with all related inputs and data.

Basic Summary Export

python
from syntheticusers import ApiClient, Configuration, SummariesApi

configuration = Configuration(
    host="https://api.syntheticusers.com/api/v1",
    access_token="your-access-token"
)

with ApiClient(configuration) as api_client:
    summaries_api = SummariesApi(api_client)
    
    # Export a summary as text
    export_data = summaries_api.export_summary_v1(
        summary_id="your-summary-id"
    )
    
    # Save to file
    with open("summary_export.txt", "w") as f:
        f.write(export_data)
    
    print("✓ Summary exported successfully")

Export All Summaries in a Study

python
from syntheticusers import SummariesApi

def export_all_study_summaries(api_client, study_id, output_dir="exports"):
    """Export all summaries from a study"""
    import os
    
    summaries_api = SummariesApi(api_client)
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all summaries for the study
    page = 1
    exported_count = 0
    
    while True:
        response = summaries_api.list_summaries_v1(
            study_id=study_id,
            page=page,
            page_size=50
        )
        
        for summary in response.items:
            try:
                # Export each summary
                export_data = summaries_api.export_summary_v1(
                    summary_id=summary.id
                )
                
                # Save to file
                filename = f"{output_dir}/summary_{summary.id}.txt"
                with open(filename, "w") as f:
                    f.write(export_data)
                
                exported_count += 1
                print(f"✓ Exported summary {summary.id}")
            except Exception as e:
                print(f"✗ Failed to export {summary.id}: {e}")
        
        if len(response.items) < 50:
            break
        page += 1
    
    print(f"\nExported {exported_count} summaries to {output_dir}/")
    return exported_count

# Usage
with ApiClient(configuration) as api_client:
    export_all_study_summaries(
        api_client,
        study_id="your-study-id",
        output_dir="study_exports"
    )

Downloading PDF Reports

Download study and report PDFs for sharing and archiving.

Download Study PDF

python
from syntheticusers import StudiesApi

with ApiClient(configuration) as api_client:
    studies_api = StudiesApi(api_client)
    
    # Get PDF as bytes
    pdf_bytes = studies_api.get_study_pdf_v1(
        study_id="your-study-id"
    )
    
    # Save to file
    with open("study_report.pdf", "wb") as f:
        f.write(pdf_bytes)
    
    print("✓ Study PDF downloaded")

Download Report PDF

python
from syntheticusers import ReportsApi

with ApiClient(configuration) as api_client:
    reports_api = ReportsApi(api_client)
    
    # Get report PDF
    pdf_bytes = reports_api.get_report_pdf_v1(
        report_id="your-report-id"
    )
    
    # Save to file
    with open("report.pdf", "wb") as f:
        f.write(pdf_bytes)
    
    print("✓ Report PDF downloaded")

Batch Download All Study PDFs

python
from syntheticusers import StudiesApi
import os

def download_all_study_pdfs(api_client, project_id, output_dir="pdfs"):
    """Download PDFs for all studies in a project"""
    studies_api = StudiesApi(api_client)
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all studies
    page = 1
    downloaded_count = 0
    
    while True:
        response = studies_api.list_studies_v1(
            project_id=project_id,
            page=page,
            page_size=50
        )
        
        for study in response.items:
            try:
                # Download PDF
                pdf_bytes = studies_api.get_study_pdf_v1(study_id=study.id)
                
                # Save with meaningful filename
                safe_name = study.description[:50].replace("/", "_")
                filename = f"{output_dir}/{safe_name}_{study.id}.pdf"
                
                with open(filename, "wb") as f:
                    f.write(pdf_bytes)
                
                downloaded_count += 1
                print(f"✓ Downloaded {study.id}")
            except Exception as e:
                print(f"✗ Failed to download {study.id}: {e}")
        
        if len(response.items) < 50:
            break
        page += 1
    
    print(f"\nDownloaded {downloaded_count} PDFs to {output_dir}/")
    return downloaded_count

with ApiClient(configuration) as api_client:
    download_all_study_pdfs(
        api_client,
        project_id="your-project-id"
    )

Working with Files

Download uploaded files and manage file downloads.

Get File Download URL

python
from syntheticusers import FilesApi

with ApiClient(configuration) as api_client:
    files_api = FilesApi(api_client)
    
    # Get presigned download URL
    file_with_url = files_api.get_file_download_url_v1(
        file_id="your-file-id"
    )
    
    print(f"Download URL: {file_with_url.download_url}")
    print(f"URL expires at: {file_with_url.expires_at}")

Download a File

python
import requests
from syntheticusers import FilesApi

with ApiClient(configuration) as api_client:
    files_api = FilesApi(api_client)
    
    # Get file metadata and download URL
    file_with_url = files_api.get_file_download_url_v1(
        file_id="your-file-id"
    )
    
    # Download the file
    response = requests.get(file_with_url.download_url)
    
    if response.status_code == 200:
        # Save to disk
        with open(file_with_url.name, "wb") as f:
            f.write(response.content)
        print(f"✓ Downloaded {file_with_url.name}")
    else:
        print(f"✗ Download failed: {response.status_code}")

Download All Project Files

python
import requests
from syntheticusers import FilesApi
import os

def download_all_project_files(api_client, project_id, output_dir="files"):
    """Download all files from a project"""
    files_api = FilesApi(api_client)
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all files
    page = 1
    downloaded_count = 0
    
    while True:
        response = files_api.list_files_v1(
            project_id=project_id,
            page=page,
            page_size=50
        )
        
        for file in response.items:
            try:
                # Get download URL
                file_with_url = files_api.get_file_download_url_v1(
                    file_id=file.id
                )
                
                # Download
                download_response = requests.get(file_with_url.download_url)
                
                if download_response.status_code == 200:
                    filepath = f"{output_dir}/{file_with_url.name}"
                    with open(filepath, "wb") as f:
                        f.write(download_response.content)
                    downloaded_count += 1
                    print(f"✓ Downloaded {file_with_url.name}")
                else:
                    print(f"✗ Failed to download {file.id}")
            except Exception as e:
                print(f"✗ Error downloading {file.id}: {e}")
        
        if len(response.items) < 50:
            break
        page += 1
    
    print(f"\nDownloaded {downloaded_count} files to {output_dir}/")
    return downloaded_count

with ApiClient(configuration) as api_client:
    download_all_project_files(
        api_client,
        project_id="your-project-id"
    )

Building Export Pipelines

Create automated export workflows for regular data exports.

Complete Export Pipeline

python
import os
from datetime import datetime
from syntheticusers import (
    ApiClient,
    Configuration,
    StudiesApi,
    SummariesApi,
    ReportsApi,
    FilesApi
)

def export_project_data(project_id, access_token, base_dir="exports"):
    """
    Complete export pipeline for a project.
    Exports all studies, summaries, reports, and files.
    """
    configuration = Configuration(
        host="https://api.syntheticusers.com/api/v1",
        access_token=access_token
    )
    
    # Create timestamped export directory
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    export_dir = f"{base_dir}/project_{project_id}_{timestamp}"
    os.makedirs(export_dir, exist_ok=True)
    
    print(f"Starting export to {export_dir}/")
    
    with ApiClient(configuration) as api_client:
        studies_api = StudiesApi(api_client)
        summaries_api = SummariesApi(api_client)
        reports_api = ReportsApi(api_client)
        
        # 1. Export all studies as PDFs
        print("\n📄 Exporting studies...")
        studies_dir = f"{export_dir}/studies"
        os.makedirs(studies_dir, exist_ok=True)
        
        page = 1
        study_count = 0
        
        while True:
            response = studies_api.list_studies_v1(
                project_id=project_id,
                page=page,
                page_size=50
            )
            
            for study in response.items:
                try:
                    pdf_bytes = studies_api.get_study_pdf_v1(study_id=study.id)
                    filename = f"{studies_dir}/study_{study.id}.pdf"
                    with open(filename, "wb") as f:
                        f.write(pdf_bytes)
                    study_count += 1
                except Exception as e:
                    print(f"  ✗ Failed to export study {study.id}: {e}")
            
            if len(response.items) < 50:
                break
            page += 1
        
        print(f"  ✓ Exported {study_count} studies")
        
        # 2. Export all summaries
        print("\n📝 Exporting summaries...")
        summaries_dir = f"{export_dir}/summaries"
        os.makedirs(summaries_dir, exist_ok=True)
        
        page = 1
        summary_count = 0
        
        while True:
            response = summaries_api.list_summaries_v1(
                project_id=project_id,
                page=page,
                page_size=50
            )
            
            for summary in response.items:
                try:
                    export_data = summaries_api.export_summary_v1(
                        summary_id=summary.id
                    )
                    filename = f"{summaries_dir}/summary_{summary.id}.txt"
                    with open(filename, "w") as f:
                        f.write(export_data)
                    summary_count += 1
                except Exception as e:
                    print(f"  ✗ Failed to export summary {summary.id}: {e}")
            
            if len(response.items) < 50:
                break
            page += 1
        
        print(f"  ✓ Exported {summary_count} summaries")
        
        # 3. Download all files
        print("\n📎 Downloading files...")
        files_count = download_all_project_files(
            api_client,
            project_id,
            output_dir=f"{export_dir}/files"
        )
        
    print(f"\n✅ Export complete!")
    print(f"   Location: {export_dir}/")
    print(f"   Studies: {study_count}")
    print(f"   Summaries: {summary_count}")
    print(f"   Files: {files_count}")
    
    return export_dir

# Usage
export_dir = export_project_data(
    project_id="your-project-id",
    access_token="your-access-token"
)

Scheduled Export Job

python
import schedule
import time

def scheduled_export_job():
    """Run export on a schedule"""
    print(f"Running scheduled export at {datetime.now()}")
    try:
        export_project_data(
            project_id="your-project-id",
            access_token="your-access-token",
            base_dir="/backups/exports"
        )
    except Exception as e:
        print(f"Export job failed: {e}")

# Schedule export to run daily at 2 AM
schedule.every().day.at("02:00").do(scheduled_export_job)

print("Export scheduler started. Press Ctrl+C to exit.")
while True:
    schedule.run_pending()
    time.sleep(60)

Exporting to Different Formats

Convert exported data to various formats for analysis.

Export to JSON

python
import json
from syntheticusers import InterviewsApi, SyntheticUsersApi

def export_interviews_to_json(api_client, study_id, output_file="interviews.json"):
    """Export all interviews as JSON"""
    interviews_api = InterviewsApi(api_client)
    synthetic_users_api = SyntheticUsersApi(api_client)
    
    # Get all interviews
    interviews = []
    page = 1
    
    while True:
        response = interviews_api.list_interviews_v1(
            study_id=study_id,
            page=page,
            page_size=50
        )
        
        for interview in response.items:
            # Get full interview details
            full_interview = interviews_api.get_interview_v1(
                interview_id=interview.id
            )
            
            # Convert to dict
            interviews.append({
                "id": full_interview.id,
                "synthetic_user_id": full_interview.synthetic_user_id,
                "status": full_interview.status,
                "created_at": full_interview.created_at.isoformat(),
                # Add other fields as needed
            })
        
        if len(response.items) < 50:
            break
        page += 1
    
    # Save as JSON
    with open(output_file, "w") as f:
        json.dump(interviews, f, indent=2)
    
    print(f"✓ Exported {len(interviews)} interviews to {output_file}")

with ApiClient(configuration) as api_client:
    export_interviews_to_json(
        api_client,
        study_id="your-study-id"
    )

Export to CSV

python
import csv
from syntheticusers import ConversationsApi

def export_conversations_to_csv(api_client, study_id, output_file="conversations.csv"):
    """Export conversations as CSV"""
    conversations_api = ConversationsApi(api_client)
    
    # Get all conversations
    page = 1
    
    with open(output_file, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        
        # Write header
        writer.writerow([
            "conversation_id",
            "interview_id",
            "synthetic_user_id",
            "message",
            "role",
            "timestamp"
        ])
        
        conversation_count = 0
        
        while True:
            response = conversations_api.list_conversations_v1(
                study_id=study_id,
                page=page,
                page_size=50
            )
            
            for conv in response.items:
                # Get full conversation details
                full_conv = conversations_api.get_conversation_v1(
                    conversation_id=conv.id
                )
                
                # Write each message as a row
                for message in full_conv.messages:
                    writer.writerow([
                        conv.id,
                        conv.interview_id,
                        conv.synthetic_user_id,
                        message.get("content", ""),
                        message.get("role", ""),
                        message.get("timestamp", "")
                    ])
                
                conversation_count += 1
            
            if len(response.items) < 50:
                break
            page += 1
    
    print(f"✓ Exported {conversation_count} conversations to {output_file}")

with ApiClient(configuration) as api_client:
    export_conversations_to_csv(
        api_client,
        study_id="your-study-id"
    )

Next Steps

Released under the MIT License.