Import Text Regions to Text Files
A singletext region
object label can be added at a character range between start
and end
using range
.
Text Region Ontology

Copy
from encord import EncordUserClient, Project
from encord.objects import Object, ObjectInstance
from encord.objects.coordinates import TextCoordinates
from encord.objects.frames import Range
from encord.objects.attributes import RadioAttribute, TextAttribute, Option
# User input
SSH_PATH = "/Users/chris-encord/ssh-private-key.txt" # Replace with the file path to your SSH private key
PROJECT_ID = "00000000-0000-0000-0000-000000000000" # Replace with the unique Project ID
BUNDLE_SIZE = 100
# Authorize connection to Encord
user_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH,
# For US platform users use domain="https://api.us.encord.com"
domain="https://api.encord.com",
)
# Get project
project: Project = user_client.get_project(PROJECT_ID)
assert project is not None, f"Project with ID {PROJECT_ID} not found."
# Ontology verification
ontology_structure = project.ontology_structure
assert ontology_structure is not None, "Ontology structure not found in the project."
text_object: Object = ontology_structure.get_child_by_title(title="Edit", type_=Object)
assert text_object is not None, "Ontology object for 'Edit' not found."
correction_radio_attribute = ontology_structure.get_child_by_title(
type_=RadioAttribute, title="Corrections"
)
assert correction_radio_attribute is not None, "Radio attribute 'Corrections' not found."
english_correction_option = correction_radio_attribute.get_child_by_title(
type_=Option, title="English corrections"
)
assert english_correction_option is not None, "Option 'English corrections' not found under 'Corrections'."
english_correction_text_attribute = english_correction_option.get_child_by_title(
type_=TextAttribute, title="Correction text"
)
assert english_correction_text_attribute is not None, (
"Text attribute 'Correction text' not found under 'English corrections'."
)
# Labels
text_annotations = {
"paradise-lost.txt": [
{
"label_ref": "text_region_001",
"coordinates": Range(start=5000, end=5050),
"correction_text": "This needs to be updated for clarity.",
},
{
"label_ref": "text_region_002",
"coordinates": Range(start=6000, end=6050),
"correction_text": "Rephrase for better readability.",
},
],
"War and Peace.txt": [
{
"label_ref": "text_region_003",
"coordinates": Range(start=3000, end=3050),
"correction_text": "Grammar correction required.",
},
{
"label_ref": "text_region_004",
"coordinates": Range(start=4000, end=4050),
"correction_text": "Check for historical accuracy.",
},
],
}
# Initialize label rows
label_row_map = {}
with project.create_bundle(bundle_size=BUNDLE_SIZE) as bundle:
for data_title in text_annotations.keys():
label_rows = project.list_label_rows_v2(data_title_eq=data_title)
if not label_rows:
print(f"Skipping: No label row found for {data_title}")
continue
lr = label_rows[0]
lr.initialise_labels(bundle=bundle)
label_row_map[data_title] = lr
# Apply labels
label_rows_to_save = []
for data_title, annotations in text_annotations.items():
lr = label_row_map.get(data_title)
if lr is None:
print(f"Skipping: No initialized label row found for {data_title}")
continue
for ann in annotations:
coord = TextCoordinates(range=[ann["coordinates"]])
inst: ObjectInstance = text_object.create_instance()
inst.set_for_frames(frames=0, coordinates=coord)
inst.set_answer(attribute=correction_radio_attribute, answer=english_correction_option)
inst.set_answer(attribute=english_correction_text_attribute, answer=ann["correction_text"])
lr.add_object_instance(inst)
print(f"Added [English correction] text region {ann['label_ref']} to {data_title}")
label_rows_to_save.append(lr)
# Save label rows
with project.create_bundle(bundle_size=BUNDLE_SIZE) as bundle:
for lr in label_rows_to_save:
lr.save(bundle=bundle)
print(f"Saved label row for {lr.data_title}")
print("English correction labels applied.")
Import Classifications to Text Files
The example for the Classification uses nested attributes with the Ontology structure as follows:- Accurate?
- Yes
- No
- Correction (text field to provide edits for the correction)
create_instance
must use range_only=True
for text documents. This includes HTML documents.Copy
# Import dependencies
from typing import List
from pathlib import Path
from encord import EncordUserClient, Project
from encord.objects.frames import Range
from encord.objects import LabelRowV2, Classification, Option, OntologyStructure
SSH_PATH = "<file-path-to-ssh-private-key>"
PROJECT_ID = "<project-unique-id>"
# Create user client using access key
user_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
Path(SSH_PATH).read_text()
)
# Get project for which predictions are to be added
project: Project = user_client.get_project(PROJECT_ID)
# Specify the data unit to apply classification
label_row = project.list_label_rows_v2(
data_title_eq="<file-name-for-text-file>.html"
)[0]
# Download the existing labels
label_row.initialise_labels()
# Get the Ontology structure
ontology_structure: OntologyStructure = label_row.ontology_structure
# Assume that the following radio button classification exists in the Ontology.
radio_ontology_classification: Classification = (
ontology_structure.get_child_by_title(
title="<classification-name>", type_=Classification
)
)
radio_classification_option = radio_ontology_classification.get_child_by_title(
title="<option-name>",
type_=Option
)
# Create classification instance. `range_only=True` is required for HTML documents
radio_classification_instance = radio_ontology_classification.create_instance(range_only=True)
# Set the answer of the classification instance
radio_classification_instance.set_answer(radio_classification_option)
# Select the frames where the classification instance is present
radio_classification_instance.set_for_frames(frames=0)
# Add it to the label row
label_row.add_classification_instance(radio_classification_instance)
# Save labels
label_row.save()
Export Labels for Text Files
Copy
# Import dependencies
from encord import EncordUserClient
import json
SSH_PATH= "<file-path-to-ssh-private-key"
PROJECT_ID= "<project-unique-id>"
DATA_UNIT_NAME= "<file-name-of-html-file>"
# Instantiate client. Replace <private_key_path> with the path to the file containing your private key.
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH
)
# Specify Project. Replace <project_hash> with the hash of the Project you want to export labels for.
project = user_client.get_project(PROJECT_ID)
# Specify the data unit you want to export labels for. Replace <file_name> with the name of your specific data unit.
specific_label_row = project.list_label_rows_v2(
data_title_eq=DATA_UNIT_NAME
)[0]
# Download label information for the specific data unit
specific_label_row.initialise_labels()
# Print the labels as JSON
print(json.dumps(specific_label_row.to_encord_dict()))
Remove Labels from Text Files
Copy
from encord import EncordUserClient
import json
SSH_PATH= "<file-path-to-ssh-private-key>"
PROJECT_ID= "<project-unique-id>"
DATA_UNIT_NAME= "<file-name-of-html-file>"
# Instantiate client. Replace <private_key_path> with the path to the file containing your private key.
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH
)
# Specify Project. Replace <project_hash> with the hash of the Project you want to export labels for.
project = user_client.get_project(PROJECT_ID)
# Specify the data unit you want to export labels for. Replace <file_name> with the name of your specific data unit.
specific_label_row = project.list_label_rows_v2(
data_title_eq=DATA_UNIT_NAME
)[0]
object_to_remove = None
specific_label_row.initialise_labels()
for object_instance in specific_label_row.get_object_instances():
if object_instance.object_hash == '<label-unique-id>':
object_to_remove = object_instance
specific_label_row.remove_object(object_to_remove)
specific_label_row.save()