Get in touch

Get in touch

Prefer using email? Say hi at hello@moveshelf.com

This section explains how to retrieve data (files) for all subjects in a project on Moveshelf using the Moveshelf API in an efficient way. The key points to speed up execution in this use case are:
  • Reduce the number of API calls to retrieve data
  • Download data in parallel
Prerequisites
Before implementing this example, ensure that your processing script includes all necessary setup steps. In particular, you should have:
Implementation
To retrieve the additional data files for all subjects in a project, you can use the following script:

import os, sys, json
parent_folder = os.path.dirname(os.path.dirname(__file__))
sys.path.append(parent_folder)
from moveshelf_api import util
import requests
from api.api import MoveshelfApiCustomized
from concurrent.futures import ThreadPoolExecutor

# Use a requests.Session for connection pooling
requests_session = requests.Session()

def download_with_session(url):
    return download_json_file(url, session=requests_session)

def download_json_file(url, session=None):
    try:
        response = session.get(url) if session else requests.get(url)
        decoded_content = response.content.decode()
        return json.loads(decoded_content)
    except Exception as e:
        print(f"Failed to download or parse {url}: {e}")
        return None
   
## Setup the API
# Load config
personal_config = os.path.join(parent_folder, "mvshlf-config.json")
if not os.path.isfile(personal_config):
    raise FileNotFoundError(
        f"Configuration file '{personal_config}' is missing.\n"
        "Ensure the file exists with the correct name and path."
    )

with open(personal_config, "r") as config_file:
    data = json.load(config_file)

api = MoveshelfApiCustomized(
    api_key_file=os.path.join(parent_folder, data["apiKeyFileName"]),
    api_url=data["apiUrl"],
)

## Get available projects
projects = api.getUserProjects()
projectNames = [project['name'] for project in projects if len(projects) > 0]

my_project = "<organizationName/projectName>"  # e.g. support/demoProject
idx_my_project = projectNames.index(my_project)
my_project_id = projects[idx_my_project]["id"]
fileExtensionToDownload = '.json'  # Only download json files

all_subject_details = api.getProjectSubjectsWithAdditionalData(my_project_id)

## Extract URLs and file paths for additional data
URLs = []
file_paths = []
for subject_details in all_subject_details:
    for session in subject_details.get("sessions", []):
        for clip in session.get("clips", []):
            for ad in clip.get("additionalData", []):
                if ad["originalDataDownloadUri"].endswith(fileExtensionToDownload):
                    URLs.append(ad["originalDataDownloadUri"])
                    file_paths.append(f'{clip["projectPath"]}{clip["title"]}/{ad["originalFileName"]}')

# Download additional data in parallel
with ThreadPoolExecutor(max_workers=5) as executor:
    additional_data = list(executor.map(download_with_session, URLs))