To copy labels from one Project to another, both Projects must contain the same data unit. The data unit must have the same title in both Projects. If the Projects use different Datasets, make sure the data unit names match exactly.

Use the following script to copy labels from a source Project to a target Project. Before running the script ensure that you:

  • Replace <private_key_path> with the path to your private key.
  • Replace <source_project_id> with the Project ID of your source Project.
  • Replace <target_project_id> with the Project ID of your target Project.
  • Optionally set the OVERWRITE_STRATEGY
    • skip: Ignores tasks with existing labels.
    • add: Adds all labels from the source Project to the target Project for all tasks.
    • overwrite: Replaces labels in the target Project if they exist in the same data unit.
  • Optionally set the COPY_METHOD to specify the basis for copying, DataHash or DataTitle.
label_copy.py
import argparse
from enum import Enum
from encord import EncordUserClient
from tqdm import tqdm # Make sure tqdm is installed: pip install tqdm

class CopyMethod(Enum):
    DataHash = 0
    DataTitle = 1

def get_empty_labels_flag(label_row):
    """Checks if a label row has no object or classification instances."""
    return not (
        label_row.get_object_instances()
        or label_row.get_classification_instances()
    )

def copy_labels_to_target(source_label_row, target_label_row):
    """Copies all object and classification instances from source to target label row."""
    # Add object instances
    for obj in source_label_row.get_object_instances():
        target_label_row.add_object_instance(obj.copy())
    
    # Add classification instances
    for cl in source_label_row.get_classification_instances():
        target_label_row.add_classification_instance(cl.copy())
    
    # Save the changes to the target label row
    target_label_row.save()

def main(
    keyfile: str,
    source_project_hash: str,
    target_project_hash: str,
    overwrite_strategy: str,
    copy_method: CopyMethod,
):
    """
    Main function to copy labels between two Encord projects.
    """
    user_client = EncordUserClient.create_with_ssh_private_key(
        ssh_private_key_path=keyfile
    )
    source_project = user_client.get_project(source_project_hash)
    target_project = user_client.get_project(target_project_hash)

    # --- Initial Project and Ontology Checks ---
    if source_project.ontology_hash != target_project.ontology_hash:
        raise Exception("Projects must share the same Ontology.")
    
    answer = input(f"Target project title is '{target_project.title}'. Continue (y/n)? ")
    if answer.lower() != "y":
        raise Exception("Aborting: User cancelled project confirmation.")

    if overwrite_strategy not in ["skip", "add", "overwrite"]:
        raise ValueError(
            f"Invalid overwrite strategy: '{overwrite_strategy}'. "
            f"Possible options are: 'skip', 'add', or 'overwrite'."
        )
    
    # --- Overwrite Confirmation if Applicable ---
    should_overwrite_target_labels = (overwrite_strategy == "overwrite")
    if should_overwrite_target_labels:
        answer = input(
            "WARNING: 'overwrite' will remove all current labels in the target label row. Continue (y/n)? "
        )
        if answer.lower() != "y":
            raise Exception("Aborting: User cancelled overwrite confirmation.")

    # --- Label Copying Logic ---
    print("\nStarting label copy process...")
    for target_label_row in tqdm(target_project.list_label_rows_v2(), desc="Processing Label Rows"):
        source_label_row_match = None
        
        # Find matching label row in source project
        if copy_method == CopyMethod.DataTitle:
            matches = list(source_project.list_label_rows_v2(data_title_eq=target_label_row.data_title))
        else:  # Default to DataHash
            matches = list(source_project.list_label_rows_v2(data_hashes=[target_label_row.data_hash]))

        if len(matches) == 1:
            source_label_row_match = matches[0]
            source_label_row_match.initialise_labels() # Ensure source labels are loaded
        elif len(matches) > 1:
            print(f"  Warning: Multiple matches found for '{target_label_row.data_title}' (method: {copy_method.name}). Skipping this target label row.")
            continue
        else: # len(matches) == 0
            print(f"  Warning: No matching source label row found for '{target_label_row.data_title}' (method: {copy_method.name}). Skipping.")
            continue

        target_label_row.initialise_labels() # Ensure target labels are loaded

        # Apply overwrite strategy
        if should_overwrite_target_labels:
            # Clear target labels before copying
            target_label_row.initialise_labels(
                include_object_feature_hashes=set(),
                include_classification_feature_hashes=set(),
            )
            print(f"  Overwriting labels for: {target_label_row.data_title}")
            copy_labels_to_target(source_label_row_match, target_label_row)
        elif get_empty_labels_flag(target_label_row):
            # Target is empty, so always copy
            print(f"  Copying labels to empty target: {target_label_row.data_title}")
            copy_labels_to_target(source_label_row_match, target_label_row)
        else: # Target has labels and not overwriting
            if overwrite_strategy == "skip":
                print(f"  Skipping target data unit '{target_label_row.data_title}' (already has labels).")
            elif overwrite_strategy == "add":
                # 'Add' implies merging, which the original copy_labels function handles by adding
                # new instances. It doesn't clear existing ones.
                print(f"  Adding labels to existing target: {target_label_row.data_title}")
                copy_labels_to_target(source_label_row_match, target_label_row)
            else:
                # This else block should theoretically not be reached due to initial validation
                raise NotImplementedError(
                    f"Unhandled overwrite strategy: '{overwrite_strategy}' with non-empty target."
                )
    print("\nLabel copy process finished.")


if __name__ == "__main__":
    # --- CONFIGURE YOUR VARIABLES HERE ---
    # Replace with the actual path to your Encord SSH private key
    KEYFILE_PATH = "<private_key_path>" 
    
    # Replace with your source project hash
    SOURCE_PROJECT_HASH = "<source_project_id>" 
    
    # Replace with your target project hash
    TARGET_PROJECT_HASH = "<target_project_id>" 
    
    # Choose your strategy for existing labels in the target project:
    # "skip": Do not copy if target has labels.
    # "add": Merge new labels into existing ones (this is the default behavior if target has labels and not 'skip' or 'overwrite').
    # "overwrite": Clear target labels before copying.
    OVERWRITE_STRATEGY = "add" 
    
    # Choose how to match label rows between projects:
    # CopyMethod.DataHash: Matches by data hash (recommended for exact dataset matches).
    # CopyMethod.DataTitle: Matches by data title (use if data hashes differ but titles are unique).
    COPY_METHOD = CopyMethod.DataHash 
    # --- END CONFIGURATION ---

    main(
        keyfile=KEYFILE_PATH,
        source_project_hash=SOURCE_PROJECT_HASH,
        target_project_hash=TARGET_PROJECT_HASH,
        overwrite_strategy=OVERWRITE_STRATEGY,
        copy_method=COPY_METHOD
    )