Skip to content

Commit

Permalink
fix: LEAP-1692: Image export for COCO and YOLO (#6855)
Browse files Browse the repository at this point in the history
Co-authored-by: Sergei Ivashchenko <[email protected]>
Co-authored-by: triklozoid <[email protected]>
Co-authored-by: MihajloHoma <[email protected]>
  • Loading branch information
4 people authored Jan 21, 2025
1 parent 40f156a commit b38000e
Show file tree
Hide file tree
Showing 7 changed files with 529 additions and 70 deletions.
9 changes: 6 additions & 3 deletions label_studio/data_export/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def get(self, request, *args, **kwargs):
logger.debug('Prepare export files')

export_file, content_type, filename = DataExport.generate_export_file(
project, tasks, export_type, download_resources, request.GET
project, tasks, export_type, download_resources, request.GET, hostname=request.build_absolute_uri('/')
)

r = FileResponse(export_file, as_attachment=True, content_type=content_type, filename=filename)
Expand Down Expand Up @@ -569,7 +569,7 @@ def get(self, request, *args, **kwargs):
return response


def async_convert(converted_format_id, export_type, project, **kwargs):
def async_convert(converted_format_id, export_type, project, hostname, download_resources=False, **kwargs):
with transaction.atomic():
try:
converted_format = ConvertedFormat.objects.get(id=converted_format_id)
Expand All @@ -583,7 +583,7 @@ def async_convert(converted_format_id, export_type, project, **kwargs):
converted_format.save(update_fields=['status'])

snapshot = converted_format.export
converted_file = snapshot.convert_file(export_type)
converted_file = snapshot.convert_file(export_type, download_resources=download_resources, hostname=hostname)
if converted_file is None:
raise ValidationError('No converted file found, probably there are no annotations in the export snapshot')
md5 = Export.eval_md5(converted_file)
Expand Down Expand Up @@ -645,6 +645,7 @@ def post(self, request, *args, **kwargs):
serializer = ExportConvertSerializer(data=request.data, context={'project': snapshot.project})
serializer.is_valid(raise_exception=True)
export_type = serializer.validated_data['export_type']
download_resources = serializer.validated_data.get('download_resources')

with transaction.atomic():
converted_format, created = ConvertedFormat.objects.get_or_create(export=snapshot, export_type=export_type)
Expand All @@ -657,6 +658,8 @@ def post(self, request, *args, **kwargs):
converted_format.id,
export_type,
snapshot.project,
request.build_absolute_uri('/'),
download_resources=download_resources,
on_failure=set_convert_background_failure,
)
return Response({'export_type': export_type, 'converted_format': converted_format.id})
10 changes: 8 additions & 2 deletions label_studio/data_export/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ def _get_export_serializer_option(serialization_options):
options['context']['interpolate_key_frames'] = serialization_options['interpolate_key_frames']
if serialization_options.get('include_annotation_history') is False:
options['omit'] = ['annotations.history']
# download resources
if serialization_options.get('download_resources') is True:
options['download_resources'] = True
return options

def get_task_queryset(self, ids, annotation_filter_options):
Expand Down Expand Up @@ -303,7 +306,7 @@ def run_file_exporting(self, task_filter_options=None, annotation_filter_options
serialization_options=serialization_options,
)

def convert_file(self, to_format):
def convert_file(self, to_format, download_resources=False, hostname=None):
with get_temp_dir() as tmp_dir:
OUT = 'out'
out_dir = pathlib.Path(tmp_dir) / OUT
Expand All @@ -313,7 +316,10 @@ def convert_file(self, to_format):
config=self.project.get_parsed_config(),
project_dir=None,
upload_dir=out_dir,
download_resources=False,
download_resources=download_resources,
# for downloading resource we need access to the API
access_token=self.project.organization.created_by.auth_token.key,
hostname=hostname,
)
input_name = pathlib.Path(self.file.name).name
input_file_path = pathlib.Path(tmp_dir) / input_name
Expand Down
4 changes: 3 additions & 1 deletion label_studio/data_export/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def get_export_formats(project):
return sorted(formats, key=lambda f: f.get('disabled', False))

@staticmethod
def generate_export_file(project, tasks, output_format, download_resources, get_args):
def generate_export_file(project, tasks, output_format, download_resources, get_args, hostname=None):
"""Generate export file and return it as an open file object.
Be sure to close the file after using it, to avoid wasting disk space.
Expand All @@ -161,6 +161,8 @@ def generate_export_file(project, tasks, output_format, download_resources, get_
project_dir=None,
upload_dir=os.path.join(settings.MEDIA_ROOT, settings.UPLOAD_DIR),
download_resources=download_resources,
access_token=project.organization.created_by.auth_token.key,
hostname=hostname,
)
with get_temp_dir() as tmp_dir:
converter.convert(input_json, tmp_dir, output_format, is_dir=False)
Expand Down
1 change: 1 addition & 0 deletions label_studio/data_export/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ class SerializationOption(serializers.Serializer):

class ExportConvertSerializer(serializers.Serializer):
export_type = serializers.CharField(help_text='Export file format.')
download_resources = serializers.BooleanField(help_text='Download resources in converter.', required=False)

def validate_export_type(self, value):
project = self.context.get('project')
Expand Down
161 changes: 105 additions & 56 deletions label_studio/tests/export.tavern.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,67 +112,116 @@ stages:
url: '{django_live_url}/api/projects/{pk}/export/formats'
response:
json:
- title: 'JSON'
description: !anystr
link: 'https://labelstud.io/guide/export.html#JSON'
name: 'JSON'
- title: 'JSON-MIN'
description: !anystr
link: 'https://labelstud.io/guide/export.html#JSON-MIN'
name: 'JSON_MIN'
- title: 'CSV'
description: !anystr
link: 'https://labelstud.io/guide/export.html#CSV'
name: 'CSV'
- title: 'TSV'
description: !anystr
link: 'https://labelstud.io/guide/export.html#TSV'
name: 'TSV'
- title: 'COCO'
description: 'Popular machine learning format used by the COCO dataset for object detection and image segmentation tasks with polygons and rectangles.'
link: 'https://labelstud.io/guide/export.html#COCO'
tags: ['image segmentation', 'object detection']
name: 'COCO'
- title: 'YOLO'
description: 'Popular TXT format is created for each image file. Each txt file contains annotations for the corresponding image file, that is object class, object coordinates, height & width.'
link: 'https://labelstud.io/guide/export.html#YOLO'
tags: ['image segmentation', 'object detection']
name: 'YOLO'
- title: 'YOLOv8 OBB'
description: 'Popular TXT format is created for each image file. Each txt file contains annotations for the corresponding image file. The YOLO OBB format designates bounding boxes by their four corner points with coordinates normalized between 0 and 1, so it is possible to export rotated objects.'
link: 'https://labelstud.io/guide/export.html#YOLO'
tags: ['image segmentation', 'object detection']
name: 'YOLO_OBB'
- title: 'CONLL2003'
description: 'Popular format used for the CoNLL-2003 named entity recognition challenge.'
link: 'https://labelstud.io/guide/export.html#CONLL2003'
tags: ['sequence labeling', 'text tagging', 'named entity recognition']
name: 'CONLL2003'
- title: JSON
description: List of items in raw JSON format stored in one JSON file. Use to export
both the data and the annotations for a dataset. It's Label Studio Common Format
link: https://labelstud.io/guide/export.html#JSON
name: JSON
- title: JSON-MIN
description: List of items where only "from_name", "to_name" values from the raw
JSON format are exported. Use to export only the annotations for a dataset.
link: https://labelstud.io/guide/export.html#JSON-MIN
name: JSON_MIN
- title: CSV
description: Results are stored as comma-separated values with the column names
specified by the values of the "from_name" and "to_name" fields.
link: https://labelstud.io/guide/export.html#CSV
name: CSV
- title: TSV
description: Results are stored in tab-separated tabular file with column names
specified by "from_name" "to_name" values
link: https://labelstud.io/guide/export.html#TSV
name: TSV
- title: COCO
description: Popular machine learning format used by the COCO dataset for object
detection and image segmentation tasks with polygons and rectangles.
link: https://labelstud.io/guide/export.html#COCO
tags:
- image segmentation
- object detection
name: COCO
- title: COCO with Images
description: COCO format with images downloaded.
link: https://labelstud.io/guide/export.html#COCO
tags:
- image segmentation
- object detection
name: COCO_WITH_IMAGES
- title: YOLO
description: Popular TXT format is created for each image file. Each txt file contains
annotations for the corresponding image file, that is object class, object coordinates,
height & width.
link: https://labelstud.io/guide/export.html#YOLO
tags:
- image segmentation
- object detection
name: YOLO
- title: YOLO with Images
description: YOLO format with images downloaded.
link: https://labelstud.io/guide/export.html#YOLO
tags:
- image segmentation
- object detection
name: YOLO_WITH_IMAGES
- title: YOLOv8 OBB
description: Popular TXT format is created for each image file. Each txt file contains
annotations for the corresponding image file. The YOLO OBB format designates bounding
boxes by their four corner points with coordinates normalized between 0 and 1,
so it is possible to export rotated objects.
link: https://labelstud.io/guide/export.html#YOLO
tags:
- image segmentation
- object detection
name: YOLO_OBB
- title: YOLOv8 OBB with Images
description: YOLOv8 OBB format with images downloaded.
link: https://labelstud.io/guide/export.html#YOLO
tags:
- image segmentation
- object detection
name: YOLO_OBB_WITH_IMAGES
- title: CONLL2003
description: Popular format used for the CoNLL-2003 named entity recognition challenge.
link: https://labelstud.io/guide/export.html#CONLL2003
tags:
- sequence labeling
- text tagging
- named entity recognition
name: CONLL2003
disabled: true
- title: 'Pascal VOC XML'
description: 'Popular XML format used for object detection and polygon image segmentation tasks.'
link: 'https://labelstud.io/guide/export.html#Pascal-VOC-XML'
tags: ['image segmentation', 'object detection']
name: 'VOC'
- title: Pascal VOC XML
description: Popular XML format used for object detection and polygon image segmentation
tasks.
link: https://labelstud.io/guide/export.html#Pascal-VOC-XML
tags:
- image segmentation
- object detection
name: VOC
disabled: true
- title: 'Brush labels to NumPy'
description: 'Export your brush labels as NumPy 2d arrays. Each label outputs as one image.'
link: 'https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG'
tags: ['image segmentation']
name: 'BRUSH_TO_NUMPY'
- title: Brush labels to NumPy
description: Export your brush labels as NumPy 2d arrays. Each label outputs as
one image.
link: https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG
tags:
- image segmentation
name: BRUSH_TO_NUMPY
disabled: true
- title: 'Brush labels to PNG'
description: 'Export your brush labels as PNG images. Each label outputs as one image.'
link: 'https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG'
tags: ['image segmentation']
name: 'BRUSH_TO_PNG'
- title: Brush labels to PNG
description: Export your brush labels as PNG images. Each label outputs as one image.
link: https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG
tags:
- image segmentation
name: BRUSH_TO_PNG
disabled: true
- title: 'ASR Manifest'
description: 'Export audio transcription labels for automatic speech recognition as the JSON manifest format expected by NVIDIA NeMo models.'
link: 'https://labelstud.io/guide/export.html#ASR-MANIFEST'
tags: ['speech recognition']
name: 'ASR_MANIFEST'
- title: ASR Manifest
description: Export audio transcription labels for automatic speech recognition
as the JSON manifest format expected by NVIDIA NeMo models.
link: https://labelstud.io/guide/export.html#ASR-MANIFEST
tags:
- speech recognition
name: ASR_MANIFEST
disabled: true

status_code: 200


Expand Down
Loading

0 comments on commit b38000e

Please sign in to comment.