Skip to content

Commit

Permalink
Add duration to logger message in LoggerReporter
Browse files Browse the repository at this point in the history
  • Loading branch information
sbusso committed Mar 25, 2024
1 parent 6bf4adc commit efd898b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
4 changes: 3 additions & 1 deletion scrapework/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ def report(self, ctx: Context):

class LoggerReporter(Reporter):
def report(self, ctx: Context):
self.logger.info(f"Processed {ctx.collector.get('items_count')} items.")
self.logger.info(
f"Processed {ctx.collector.get('items_count')} items in {ctx.collector.get('duration')}s."
)


class SlackReporter(Reporter):
Expand Down
18 changes: 15 additions & 3 deletions scrapework/scraper.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
from abc import ABC, abstractmethod
from typing import Any, Callable, ClassVar, Dict, Iterable, List, Optional, Union

Expand All @@ -11,7 +12,7 @@
from scrapework.middleware import RequestMiddleware
from scrapework.module import Module
from scrapework.parsers import HTMLBodyParser
from scrapework.reporter import Reporter
from scrapework.reporter import LoggerReporter, Reporter
from scrapework.request import Request


Expand Down Expand Up @@ -60,8 +61,13 @@ class SpiderConfig(EnvConfig):
class Config:
arbitrary_types_allowed = True

def configuration(self):
pass
def use_modules(self) -> List[Module]:
return [LoggerReporter()]

def configuration(self) -> None:

for module in self.use_modules():
self.use(module)

def use(self, module: Module) -> None:
match module:
Expand Down Expand Up @@ -90,6 +96,8 @@ def run(self):
collector=MetadataCollector(),
)
for url in self.start_urls:
begin_time = datetime.datetime.now()

# Load
response = self.make_request(ctx, url)

Expand All @@ -112,6 +120,10 @@ def run(self):
for handler in self.handlers:
handler.process_items(ctx, items)

end_time = datetime.datetime.now()

ctx.collector.set("duration", end_time - begin_time)

for reporter in self.reporters:
reporter.report(ctx)
self.logger.info("Scraping complete")
Expand Down

0 comments on commit efd898b

Please sign in to comment.