diff --git a/app/celery_broker/metadata_utils.py b/app/celery_broker/metadata_utils.py index 477873c..44a31a1 100644 --- a/app/celery_broker/metadata_utils.py +++ b/app/celery_broker/metadata_utils.py @@ -87,9 +87,6 @@ def metadata_task( task_name=metadata_type, task=task, ) - crawls.update_status( - crawl_id=crawl_process.id, status=ProcessStatus.PARTIAL_ERROR - ) continue except Exception as e: logger.error( @@ -102,8 +99,5 @@ def metadata_task( task_name=metadata_type, task=task, ) - crawls.update_status( - crawl_id=crawl_process.id, status=ProcessStatus.PARTIAL_ERROR - ) continue return handle_metadata_result(task, crawl_process, result, metadata_type) diff --git a/app/celery_broker/tasks.py b/app/celery_broker/tasks.py index 06451fe..3f3dbb0 100644 --- a/app/celery_broker/tasks.py +++ b/app/celery_broker/tasks.py @@ -45,9 +45,6 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess: except Exception as e: logger.error(f"Error while crawling html files: {e}") set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR) - crawls.update_status( - crawl_id=crawl.id, status=ProcessStatus.ERROR - ) self.update_state(state='FAILURE') return crawl_process try: @@ -57,9 +54,6 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess: logger.error(f"Error while uploading html files: {e}") # Html crawl will be considered failed if we can't upload the html files set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR) - crawls.update_status( - crawl_id=crawl.id, status=ProcessStatus.ERROR - ) self.update_state(state='FAILURE') return crawl_process @@ -141,7 +135,7 @@ def finalize_crawl_process(self, crawl_process: Optional[CrawlProcess], crawl: C current_crawl.status = ProcessStatus.ERROR crawls.update_status( - crawl_id=crawl.id, status=current_crawl.status, final_status=True + crawl_id=crawl.id, status=current_crawl.status ) websites.store_last_crawl( diff --git a/app/repositories/crawls.py b/app/repositories/crawls.py index a9cf48d..5f795e1 100644 --- a/app/repositories/crawls.py +++ b/app/repositories/crawls.py @@ -63,14 +63,11 @@ def update(self, data: CrawlModel): }, ) - def update_status(self, crawl_id: str, status: ProcessStatus, final_status: bool = False): + def update_status(self, crawl_id: str, status: ProcessStatus): update_dict = {"status": status} if status == ProcessStatus.STARTED: update_dict["started_at"] = french_datetime() - elif status == ProcessStatus.SUCCESS: - update_dict["finished_at"] = french_datetime() - # In finalize task, we should update the finished_at field regardless of the status - if final_status: + else: update_dict["finished_at"] = french_datetime() self.collection.update_one( filter={"id": crawl_id},