diff --git a/changelog.d/20250124_175956_mzhiltso_optimize_quality_computation.md b/changelog.d/20250124_175956_mzhiltso_optimize_quality_computation.md new file mode 100644 index 00000000000..d54592eb0ed --- /dev/null +++ b/changelog.d/20250124_175956_mzhiltso_optimize_quality_computation.md @@ -0,0 +1,4 @@ +### Fixed + +- Improved performance and memory utilization for quality reports in tasks with ellipses and masks + () diff --git a/cvat/apps/dataset_manager/formats/transformations.py b/cvat/apps/dataset_manager/formats/transformations.py index 266e738e769..eacca23fa92 100644 --- a/cvat/apps/dataset_manager/formats/transformations.py +++ b/cvat/apps/dataset_manager/formats/transformations.py @@ -104,7 +104,7 @@ def rle(cls, arr: np.ndarray) -> list[int]: class EllipsesToMasks: @staticmethod - def convert_ellipse(ellipse, img_h, img_w): + def _convert(ellipse, img_h, img_w): cx, cy, rightX, topY = ellipse.points rx = rightX - cx ry = cy - topY @@ -112,9 +112,19 @@ def convert_ellipse(ellipse, img_h, img_w): axis = (round(rx), round(ry)) angle = ellipse.rotation mat = np.zeros((img_h, img_w), dtype=np.uint8) + + # TODO: has bad performance for big masks, try to find a better solution cv2.ellipse(mat, center, axis, angle, 0, 360, 255, thickness=-1) + rle = mask_utils.encode(np.asfortranarray(mat)) - return dm.RleMask(rle=rle, label=ellipse.label, z_order=ellipse.z_order, + return rle + + @staticmethod + def convert_ellipse(ellipse, img_h, img_w): + def _lazy_convert(): + return EllipsesToMasks._convert(ellipse, img_h, img_w) + + return dm.RleMask(rle=_lazy_convert, label=ellipse.label, z_order=ellipse.z_order, attributes=ellipse.attributes, group=ellipse.group) diff --git a/cvat/apps/dataset_manager/util.py b/cvat/apps/dataset_manager/util.py index 619ea5c8c49..c8829280978 100644 --- a/cvat/apps/dataset_manager/util.py +++ b/cvat/apps/dataset_manager/util.py @@ -9,13 +9,13 @@ import re import tempfile import zipfile -from collections.abc import Generator, Sequence +from collections.abc import Generator, Iterable, Sequence from contextlib import contextmanager from copy import deepcopy from datetime import timedelta from enum import Enum from threading import Lock -from typing import Any +from typing import Any, TypeVar import attrs import django_rq @@ -38,18 +38,26 @@ def make_zip_archive(src_path, dst_path): archive.write(path, osp.relpath(path, src_path)) -def bulk_create(db_model, objects, flt_param): +_ModelT = TypeVar("_ModelT", bound=models.Model) + +def bulk_create( + db_model: type[_ModelT], + objects: Iterable[_ModelT], + *, + flt_param: dict[str, Any] | None = None, + batch_size: int | None = 10000 +) -> list[_ModelT]: if objects: if flt_param: if "postgresql" in settings.DATABASES["default"]["ENGINE"]: - return db_model.objects.bulk_create(objects) + return db_model.objects.bulk_create(objects, batch_size=batch_size) else: ids = list(db_model.objects.filter(**flt_param).values_list('id', flat=True)) - db_model.objects.bulk_create(objects) + db_model.objects.bulk_create(objects, batch_size=batch_size) return list(db_model.objects.exclude(id__in=ids).filter(**flt_param)) else: - return db_model.objects.bulk_create(objects) + return db_model.objects.bulk_create(objects, batch_size=batch_size) return [] diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index ed1fddc9e74..45b11626ad6 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -1147,14 +1147,22 @@ def _get_compiled_mask( from pycocotools import mask as mask_utils + # Merge instance groups object_rle_groups = [_to_rle(ann, img_h=img_h, img_w=img_w) for ann in anns] object_rles = [mask_utils.merge(g) for g in object_rle_groups] - object_masks = mask_utils.decode(object_rles) + + # Mask materialization can consume a lot of memory, + # avoid storing all the masks simultaneously + def _make_lazy_decode(i: int): + def _lazy_decode() -> dm.BinaryMaskImage: + return mask_utils.decode([object_rles[i]])[:, :, 0] + + return _lazy_decode return dm.CompiledMask.from_instance_masks( # need to increment labels and instance ids by 1 to avoid confusion with background instance_masks=( - dm.Mask(image=object_masks[:, :, i], z_order=ann.z_order, label=ann.label + 1) + dm.Mask(image=_make_lazy_decode(i), z_order=ann.z_order, label=ann.label + 1) for i, ann in enumerate(anns) ), instance_ids=(iid + 1 for iid in instance_ids), @@ -2529,9 +2537,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models ) db_job_reports.append(db_job_report) - db_job_reports = bulk_create( - db_model=models.QualityReport, objects=db_job_reports, flt_param={} - ) + db_job_reports = bulk_create(db_model=models.QualityReport, objects=db_job_reports) db_conflicts = [] db_report_iter = itertools.chain([db_task_report], db_job_reports) @@ -2546,9 +2552,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models ) db_conflicts.append(db_conflict) - db_conflicts = bulk_create( - db_model=models.AnnotationConflict, objects=db_conflicts, flt_param={} - ) + db_conflicts = bulk_create(db_model=models.AnnotationConflict, objects=db_conflicts) db_ann_ids = [] db_conflicts_iter = iter(db_conflicts) @@ -2564,7 +2568,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models ) db_ann_ids.append(db_ann_id) - db_ann_ids = bulk_create(db_model=models.AnnotationId, objects=db_ann_ids, flt_param={}) + db_ann_ids = bulk_create(db_model=models.AnnotationId, objects=db_ann_ids) return db_task_report