Get Started
- Global and US Encord Platforms
- 1. Prerequisites and Installation
- 2. Register Cloud Data
- 3. Set Up Your Project and Team
- Export Labels
General
Index
Projects
Labels
- Working with Labels
- Delete Labels/Classifications
- Label / Activity logs
- Bitmasks
- Audio Labels and Classifications
- HTML Files and Labels
- Text Files and Labels
- PDF Labels and Classifications
- Import Labels/Annotations
- Import Labels/Annotations to Consensus Branches
- Import COCO Labels/Annotations
- Copy labels between Projects
Datasets
Labels
Copy labels between Projects
To copy labels from one Project to another, both Projects must contain the same data unit. The data unit must have the same title in both Projects. If the Projects use different Datasets, make sure the data unit names match exactly.
Use the following script to copy labels from a source Project to a target Project. Before running the script ensure that you:
- Replace
<private_key_path>
with the path to your private key. - Replace
<source_project_id>
with the Project ID of your source Project. - Replace
<target_project_id>
with the Project ID of your target Project. - Optionally set the
OVERWRITE_STRATEGY
skip
: Ignores tasks with existing labels.add
: Adds all labels from the source Project to the target Project for all tasks.overwrite
: Replaces labels in the target Project if they exist in the same data unit.
- Optionally set the
COPY_METHOD
to specify the basis for copying,DataHash
orDataTitle
.
label_copy.py
import argparse
from enum import Enum
from encord import EncordUserClient
from tqdm import tqdm # Make sure tqdm is installed: pip install tqdm
class CopyMethod(Enum):
DataHash = 0
DataTitle = 1
def get_empty_labels_flag(label_row):
"""Checks if a label row has no object or classification instances."""
return not (
label_row.get_object_instances()
or label_row.get_classification_instances()
)
def copy_labels_to_target(source_label_row, target_label_row):
"""Copies all object and classification instances from source to target label row."""
# Add object instances
for obj in source_label_row.get_object_instances():
target_label_row.add_object_instance(obj.copy())
# Add classification instances
for cl in source_label_row.get_classification_instances():
target_label_row.add_classification_instance(cl.copy())
# Save the changes to the target label row
target_label_row.save()
def main(
keyfile: str,
source_project_hash: str,
target_project_hash: str,
overwrite_strategy: str,
copy_method: CopyMethod,
):
"""
Main function to copy labels between two Encord projects.
"""
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=keyfile
)
source_project = user_client.get_project(source_project_hash)
target_project = user_client.get_project(target_project_hash)
# --- Initial Project and Ontology Checks ---
if source_project.ontology_hash != target_project.ontology_hash:
raise Exception("Projects must share the same Ontology.")
answer = input(f"Target project title is '{target_project.title}'. Continue (y/n)? ")
if answer.lower() != "y":
raise Exception("Aborting: User cancelled project confirmation.")
if overwrite_strategy not in ["skip", "add", "overwrite"]:
raise ValueError(
f"Invalid overwrite strategy: '{overwrite_strategy}'. "
f"Possible options are: 'skip', 'add', or 'overwrite'."
)
# --- Overwrite Confirmation if Applicable ---
should_overwrite_target_labels = (overwrite_strategy == "overwrite")
if should_overwrite_target_labels:
answer = input(
"WARNING: 'overwrite' will remove all current labels in the target label row. Continue (y/n)? "
)
if answer.lower() != "y":
raise Exception("Aborting: User cancelled overwrite confirmation.")
# --- Label Copying Logic ---
print("\nStarting label copy process...")
for target_label_row in tqdm(target_project.list_label_rows_v2(), desc="Processing Label Rows"):
source_label_row_match = None
# Find matching label row in source project
if copy_method == CopyMethod.DataTitle:
matches = list(source_project.list_label_rows_v2(data_title_eq=target_label_row.data_title))
else: # Default to DataHash
matches = list(source_project.list_label_rows_v2(data_hashes=[target_label_row.data_hash]))
if len(matches) == 1:
source_label_row_match = matches[0]
source_label_row_match.initialise_labels() # Ensure source labels are loaded
elif len(matches) > 1:
print(f" Warning: Multiple matches found for '{target_label_row.data_title}' (method: {copy_method.name}). Skipping this target label row.")
continue
else: # len(matches) == 0
print(f" Warning: No matching source label row found for '{target_label_row.data_title}' (method: {copy_method.name}). Skipping.")
continue
target_label_row.initialise_labels() # Ensure target labels are loaded
# Apply overwrite strategy
if should_overwrite_target_labels:
# Clear target labels before copying
target_label_row.initialise_labels(
include_object_feature_hashes=set(),
include_classification_feature_hashes=set(),
)
print(f" Overwriting labels for: {target_label_row.data_title}")
copy_labels_to_target(source_label_row_match, target_label_row)
elif get_empty_labels_flag(target_label_row):
# Target is empty, so always copy
print(f" Copying labels to empty target: {target_label_row.data_title}")
copy_labels_to_target(source_label_row_match, target_label_row)
else: # Target has labels and not overwriting
if overwrite_strategy == "skip":
print(f" Skipping target data unit '{target_label_row.data_title}' (already has labels).")
elif overwrite_strategy == "add":
# 'Add' implies merging, which the original copy_labels function handles by adding
# new instances. It doesn't clear existing ones.
print(f" Adding labels to existing target: {target_label_row.data_title}")
copy_labels_to_target(source_label_row_match, target_label_row)
else:
# This else block should theoretically not be reached due to initial validation
raise NotImplementedError(
f"Unhandled overwrite strategy: '{overwrite_strategy}' with non-empty target."
)
print("\nLabel copy process finished.")
if __name__ == "__main__":
# --- CONFIGURE YOUR VARIABLES HERE ---
# Replace with the actual path to your Encord SSH private key
KEYFILE_PATH = "<private_key_path>"
# Replace with your source project hash
SOURCE_PROJECT_HASH = "<source_project_id>"
# Replace with your target project hash
TARGET_PROJECT_HASH = "<target_project_id>"
# Choose your strategy for existing labels in the target project:
# "skip": Do not copy if target has labels.
# "add": Merge new labels into existing ones (this is the default behavior if target has labels and not 'skip' or 'overwrite').
# "overwrite": Clear target labels before copying.
OVERWRITE_STRATEGY = "add"
# Choose how to match label rows between projects:
# CopyMethod.DataHash: Matches by data hash (recommended for exact dataset matches).
# CopyMethod.DataTitle: Matches by data title (use if data hashes differ but titles are unique).
COPY_METHOD = CopyMethod.DataHash
# --- END CONFIGURATION ---
main(
keyfile=KEYFILE_PATH,
source_project_hash=SOURCE_PROJECT_HASH,
target_project_hash=TARGET_PROJECT_HASH,
overwrite_strategy=OVERWRITE_STRATEGY,
copy_method=COPY_METHOD
)
Was this page helpful?
Assistant
Responses are generated using AI and may contain mistakes.