Large CSV data export

1. Create Batch Export Functions

In this step, we are defining a function to get all the rows in the transformation table as a Json object.

Python

def get_all_transform_results(transform_id, limit=250):
  base_url = "https://api.runtrellis.com/v1/transforms/{}/results"
  url = base_url.format(transform_id)
  headers = {
      "Authorization": YOUR_API_KEY,
      "Content-Type": "application/json"
  }
  offset = 0
  all_results = []
  
  while True:
      print("Processing batch", offset)
      payload = {}
      querystring = {"limit":limit,"offset":offset}
  
      response = requests.post(url, json=payload, headers=headers, params=querystring)
      
      if response.status_code == 200:
          data = response.json()
          results = data.get('data', [])
          all_results.extend(results)
          
          # Check if we've received fewer results than the limit
          if len(results) < limit:
              break
          
          offset += limit
      else:
          print(f"Error: {response.status_code}")
          print(response.text)
          break
  
  return all_results, data.get('metadata', {})

2. Map Json results to a dataframe

In this section, we’re mapping the Json object to the data frame format which matches the table view in the UI.

Python

import pandas as pd
def process_results_to_csv(results, metadata):
    # Create op_id to name mapping
    op_mapping = {
        col['id']: col['name']
        for col in metadata['column_definitions']
    }
    
    # Convert results to DataFrame
    df = pd.DataFrame(results)
    
    # Rename columns using mapping
    renamed_columns = {
        col: op_mapping.get(col, col) 
        for col in df.columns
    }
    df = df.rename(columns=renamed_columns)
    
    return df

3. Get the full results

Python

# Usage
from datetime import datetime
YOUR_TRANSFORMATION_ID = "YOUR_TRANSFORMATION_ID"
YOUR_API_KEY = "YOUR_API_KEY"
results, metadata = get_all_transform_results(YOUR_TRANSFORMATION_ID)
print(f"Total results retrieved: {len(results)}")
# Convert to CSV
df = process_results_to_csv(results, metadata)
current_date = datetime.now().strftime('%Y_%m_%d')
df.to_csv(f"trellis_transform_{transform_id}_results_{current_date}.csv", index=False)print(f"Results saved to trellis_transform_results.csv")

Documentation

For enterprises

Large CSV data export