Skip to content

Commit

Permalink
fix: only calculate crawl statut at the beginning and the end of the …
Browse files Browse the repository at this point in the history
…crawl
  • Loading branch information
Pandalei97 committed Dec 14, 2023
1 parent b58e64b commit 055fff6
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 18 deletions.
6 changes: 0 additions & 6 deletions app/celery_broker/metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,6 @@ def metadata_task(
task_name=metadata_type,
task=task,
)
crawls.update_status(
crawl_id=crawl_process.id, status=ProcessStatus.PARTIAL_ERROR
)
continue
except Exception as e:
logger.error(
Expand All @@ -102,8 +99,5 @@ def metadata_task(
task_name=metadata_type,
task=task,
)
crawls.update_status(
crawl_id=crawl_process.id, status=ProcessStatus.PARTIAL_ERROR
)
continue
return handle_metadata_result(task, crawl_process, result, metadata_type)
8 changes: 1 addition & 7 deletions app/celery_broker/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess:
except Exception as e:
logger.error(f"Error while crawling html files: {e}")
set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR)
crawls.update_status(
crawl_id=crawl.id, status=ProcessStatus.ERROR
)
self.update_state(state='FAILURE')
return crawl_process
try:
Expand All @@ -57,9 +54,6 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess:
logger.error(f"Error while uploading html files: {e}")
# Html crawl will be considered failed if we can't upload the html files
set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR)
crawls.update_status(
crawl_id=crawl.id, status=ProcessStatus.ERROR
)
self.update_state(state='FAILURE')
return crawl_process

Expand Down Expand Up @@ -141,7 +135,7 @@ def finalize_crawl_process(self, crawl_process: Optional[CrawlProcess], crawl: C
current_crawl.status = ProcessStatus.ERROR

crawls.update_status(
crawl_id=crawl.id, status=current_crawl.status, final_status=True
crawl_id=crawl.id, status=current_crawl.status
)

websites.store_last_crawl(
Expand Down
7 changes: 2 additions & 5 deletions app/repositories/crawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,11 @@ def update(self, data: CrawlModel):
},
)

def update_status(self, crawl_id: str, status: ProcessStatus, final_status: bool = False):
def update_status(self, crawl_id: str, status: ProcessStatus):
update_dict = {"status": status}
if status == ProcessStatus.STARTED:
update_dict["started_at"] = french_datetime()
elif status == ProcessStatus.SUCCESS:
update_dict["finished_at"] = french_datetime()
# In finalize task, we should update the finished_at field regardless of the status
if final_status:
else:
update_dict["finished_at"] = french_datetime()
self.collection.update_one(
filter={"id": crawl_id},
Expand Down

0 comments on commit 055fff6

Please sign in to comment.