Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize quality computation #8990

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- Improved performance and memory utilization for quality reports in tasks with ellipses and masks
(<https://github.com/cvat-ai/cvat/pull/8990>)
14 changes: 12 additions & 2 deletions cvat/apps/dataset_manager/formats/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,27 @@ def rle(cls, arr: np.ndarray) -> list[int]:

class EllipsesToMasks:
@staticmethod
def convert_ellipse(ellipse, img_h, img_w):
def _convert(ellipse, img_h, img_w):
cx, cy, rightX, topY = ellipse.points
rx = rightX - cx
ry = cy - topY
center = (round(cx), round(cy))
axis = (round(rx), round(ry))
angle = ellipse.rotation
mat = np.zeros((img_h, img_w), dtype=np.uint8)

# TODO: has bad performance for big masks, try to find a better solution
cv2.ellipse(mat, center, axis, angle, 0, 360, 255, thickness=-1)

rle = mask_utils.encode(np.asfortranarray(mat))
return dm.RleMask(rle=rle, label=ellipse.label, z_order=ellipse.z_order,
return rle

@staticmethod
def convert_ellipse(ellipse, img_h, img_w):
def _lazy_convert():
return EllipsesToMasks._convert(ellipse, img_h, img_w)

return dm.RleMask(rle=_lazy_convert, label=ellipse.label, z_order=ellipse.z_order,
attributes=ellipse.attributes, group=ellipse.group)


Expand Down
20 changes: 14 additions & 6 deletions cvat/apps/dataset_manager/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
import re
import tempfile
import zipfile
from collections.abc import Generator, Sequence
from collections.abc import Generator, Iterable, Sequence
from contextlib import contextmanager
from copy import deepcopy
from datetime import timedelta
from enum import Enum
from threading import Lock
from typing import Any
from typing import Any, TypeVar

import attrs
import django_rq
Expand All @@ -38,18 +38,26 @@ def make_zip_archive(src_path, dst_path):
archive.write(path, osp.relpath(path, src_path))


def bulk_create(db_model, objects, flt_param):
_ModelT = TypeVar("_ModelT", bound=models.Model)

def bulk_create(
db_model: type[_ModelT],
objects: Iterable[_ModelT],
*,
flt_param: dict[str, Any] | None = None,
batch_size: int | None = 10000
) -> list[_ModelT]:
if objects:
if flt_param:
if "postgresql" in settings.DATABASES["default"]["ENGINE"]:
return db_model.objects.bulk_create(objects)
return db_model.objects.bulk_create(objects, batch_size=batch_size)
else:
ids = list(db_model.objects.filter(**flt_param).values_list('id', flat=True))
db_model.objects.bulk_create(objects)
db_model.objects.bulk_create(objects, batch_size=batch_size)

return list(db_model.objects.exclude(id__in=ids).filter(**flt_param))
else:
return db_model.objects.bulk_create(objects)
return db_model.objects.bulk_create(objects, batch_size=batch_size)

return []

Expand Down
22 changes: 13 additions & 9 deletions cvat/apps/quality_control/quality_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -1147,14 +1147,22 @@ def _get_compiled_mask(

from pycocotools import mask as mask_utils

# Merge instance groups
object_rle_groups = [_to_rle(ann, img_h=img_h, img_w=img_w) for ann in anns]
object_rles = [mask_utils.merge(g) for g in object_rle_groups]
object_masks = mask_utils.decode(object_rles)

# Mask materialization can consume a lot of memory,
# avoid storing all the masks simultaneously
def _make_lazy_decode(i: int):
def _lazy_decode() -> dm.BinaryMaskImage:
return mask_utils.decode([object_rles[i]])[:, :, 0]

return _lazy_decode

return dm.CompiledMask.from_instance_masks(
# need to increment labels and instance ids by 1 to avoid confusion with background
instance_masks=(
dm.Mask(image=object_masks[:, :, i], z_order=ann.z_order, label=ann.label + 1)
dm.Mask(image=_make_lazy_decode(i), z_order=ann.z_order, label=ann.label + 1)
for i, ann in enumerate(anns)
),
instance_ids=(iid + 1 for iid in instance_ids),
Expand Down Expand Up @@ -2529,9 +2537,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models
)
db_job_reports.append(db_job_report)

db_job_reports = bulk_create(
db_model=models.QualityReport, objects=db_job_reports, flt_param={}
)
db_job_reports = bulk_create(db_model=models.QualityReport, objects=db_job_reports)

db_conflicts = []
db_report_iter = itertools.chain([db_task_report], db_job_reports)
Expand All @@ -2546,9 +2552,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models
)
db_conflicts.append(db_conflict)

db_conflicts = bulk_create(
db_model=models.AnnotationConflict, objects=db_conflicts, flt_param={}
)
db_conflicts = bulk_create(db_model=models.AnnotationConflict, objects=db_conflicts)

db_ann_ids = []
db_conflicts_iter = iter(db_conflicts)
Expand All @@ -2564,7 +2568,7 @@ def _save_reports(self, *, task_report: dict, job_reports: list[dict]) -> models
)
db_ann_ids.append(db_ann_id)

db_ann_ids = bulk_create(db_model=models.AnnotationId, objects=db_ann_ids, flt_param={})
db_ann_ids = bulk_create(db_model=models.AnnotationId, objects=db_ann_ids)

return db_task_report

Expand Down
Loading