Skip to content

Commit

Permalink
Merge remote-tracking branch 'mensr/lighthouse74' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Pandalei97 committed Nov 13, 2023
2 parents f879676 + 5940272 commit 66535c8
Show file tree
Hide file tree
Showing 36 changed files with 259 additions and 357 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/staging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ jobs:
steps:
- uses: actions/checkout@v3

- name : Install Packages
run : pip install -r requirements.txt

- name: test
run: python -m unittest

Expand Down
17 changes: 17 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Testing deployment

on:
push:

jobs:
unit-test:
name: run unit tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name : Install Packages
run : pip install -r requirements.txt

- name: test
run: python -m unittest
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ This endpoint allows you to create a new website configuration end execute a cra
| `depth` | `integer` | Maximum depth to crawl (**Default**: 2) |
| `limit` | `integer` | Maximum pages to crawl (**Default**: 400) |
| `headers` | `dict[str, str]` | Headers that will be passed to all crawl requests (**Default**: {})|
| `accessibility` | `MetadataConfig` | Accessibility configuration (**Default**: {'enabled':True, 'depth' 0}) |
| `good_practices` | `MetadataConfig` | Good Practices configuration (**Default**: {'enabled': False}) |
| `lighthouse` | `MetadataConfig` | Lighthouse configuration (**Default**: {'enabled':True, 'depth' 0}) |
| `technologies` | `MetadataConfig` | Technologies configuration (**Default**: {'enabled': False}) |
| `responsiveness` | `MetadataConfig` | Responsiveness configuration (**Default**: {'enabled': False}) |
| `carbon_footprint` | `MetadataConfig` | Carbon Footprint configuration (**Default**: {'enabled': False}) |
Expand Down
24 changes: 7 additions & 17 deletions app/celery_broker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@

class BaseConfig:
CRAWL_QUEUE_NAME = "crawl_queue"
ACCESSIBILITY_QUEUE_NAME = "accessibility_queue"
LIGHTHOUSE_QUEUE_NAME = "lighthouse_queue"
TECHNOLOGIES_QUEUE_NAME = "technologies_queue"
GOOD_PRACTICES_QUEUE_NAME = "good_practices_queue"
RESPONSIVENESS_QUEUE_NAME = "responsiveness_queue"
CARBON_QUEUE_NAME = "carbon_footprint_queue"
UPLOAD_QUEUE_NAME = "upload_queue"
Expand All @@ -29,20 +28,15 @@ class BaseConfig:
routing_key=CRAWL_QUEUE_NAME,
),
Queue(
ACCESSIBILITY_QUEUE_NAME,
Exchange(ACCESSIBILITY_QUEUE_NAME),
routing_key=ACCESSIBILITY_QUEUE_NAME,
LIGHTHOUSE_QUEUE_NAME,
Exchange(LIGHTHOUSE_QUEUE_NAME),
routing_key=LIGHTHOUSE_QUEUE_NAME,
),
Queue(
TECHNOLOGIES_QUEUE_NAME,
Exchange(TECHNOLOGIES_QUEUE_NAME),
routing_key=TECHNOLOGIES_QUEUE_NAME,
),
Queue(
GOOD_PRACTICES_QUEUE_NAME,
Exchange(GOOD_PRACTICES_QUEUE_NAME),
routing_key=GOOD_PRACTICES_QUEUE_NAME,
),
Queue(
RESPONSIVENESS_QUEUE_NAME,
Exchange(RESPONSIVENESS_QUEUE_NAME),
Expand All @@ -62,18 +56,14 @@ class BaseConfig:

task_routes = {
"crawl": {"queue": CRAWL_QUEUE_NAME, "routing_key": CRAWL_QUEUE_NAME},
"get_accessibility": {
"queue": ACCESSIBILITY_QUEUE_NAME,
"routing_key": ACCESSIBILITY_QUEUE_NAME,
"get_lighthouse": {
"queue": LIGHTHOUSE_QUEUE_NAME,
"routing_key": LIGHTHOUSE_QUEUE_NAME,
},
"get_technologies": {
"queue": TECHNOLOGIES_QUEUE_NAME,
"routing_key": TECHNOLOGIES_QUEUE_NAME,
},
"get_good_practices": {
"queue": GOOD_PRACTICES_QUEUE_NAME,
"routing_key": GOOD_PRACTICES_QUEUE_NAME,
},
"get_responsiveness": {
"queue": RESPONSIVENESS_QUEUE_NAME,
"routing_key": RESPONSIVENESS_QUEUE_NAME,
Expand Down
8 changes: 2 additions & 6 deletions app/celery_broker/metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
from app.models.enums import MetadataType, ProcessStatus
from app.models.metadata import MetadataTask
from app.models.process import CrawlProcess
from app.services.accessibility_best_practices_calculator import (
AccessibilityError,
BestPracticesError,
)
from app.services.lighthouse_calculator import LighthouseError
from app.services.carbon_calculator import CarbonCalculatorError
from app.services.crawler_logger import logger
from app.services.responsiveness_calculator import ResponsivenessCalculatorError
Expand Down Expand Up @@ -76,8 +73,7 @@ def metadata_task(
data = calc_method(url)
result[url] = data
except (
AccessibilityError,
BestPracticesError,
LighthouseError,
TechnologiesError,
ResponsivenessCalculatorError,
CarbonCalculatorError,
Expand Down
28 changes: 8 additions & 20 deletions app/celery_broker/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from app.models.enums import MetadataType, ProcessStatus
from app.models.metadata import MetadataTask
from app.models.process import CrawlProcess
from app.services.accessibility_best_practices_calculator import (
LighthouseWrapper,
from app.services.lighthouse_calculator import (
LighthouseCalculator,
)
from app.services.carbon_calculator import CarbonCalculator
from app.services.crawler_logger import logger
Expand Down Expand Up @@ -77,14 +77,14 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess:
return crawl_process


@celery_app.task(bind=True, name="get_accessibility")
def get_accessibility(self, crawl_process: CrawlProcess):
@celery_app.task(bind=True, name="get_lighthouse")
def get_lighthouse(self, crawl_process: CrawlProcess):
return metadata_task(
task=MetadataTask(task_id=self.request.id),
crawl_process=crawl_process,
metadata_type=MetadataType.ACCESSIBILITY,
calculator=LighthouseWrapper(),
method_name="get_accessibility",
metadata_type=MetadataType.LIGHTHOUSE,
calculator=LighthouseCalculator(),
method_name="get_lighthouse",
)


Expand All @@ -99,17 +99,6 @@ def get_technologies(self, crawl_process: CrawlProcess):
)


@celery_app.task(bind=True, name="get_good_practices")
def get_good_practices(self, crawl_process: CrawlProcess):
return metadata_task(
task=MetadataTask(task_id=self.request.id),
crawl_process=crawl_process,
metadata_type=MetadataType.GOOD_PRACTICES,
calculator=LighthouseWrapper(),
method_name="get_best_practices",
)


@celery_app.task(bind=True, name="get_responsiveness")
def get_responsiveness(self, crawl_process: CrawlProcess):
return metadata_task(
Expand All @@ -133,9 +122,8 @@ def get_carbon_footprint(self, crawl_process: CrawlProcess):


METADATA_TASK_REGISTRY = {
MetadataType.ACCESSIBILITY: get_accessibility,
MetadataType.LIGHTHOUSE: get_lighthouse,
MetadataType.TECHNOLOGIES: get_technologies,
MetadataType.GOOD_PRACTICES: get_good_practices,
MetadataType.RESPONSIVENESS: get_responsiveness,
MetadataType.CARBON_FOOTPRINT: get_carbon_footprint,
}
Expand Down
11 changes: 4 additions & 7 deletions app/models/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from app.celery_broker.utils import french_datetime
from app.models.enums import MetadataType, ProcessStatus
from app.models.metadata import MetadataConfig, AccessibilityModel, MetadataTask
from app.models.metadata import MetadataConfig, LighthouseModel, MetadataTask
from app.models.utils import get_uuid, BaseTaskModel


Expand All @@ -31,10 +31,9 @@ class CrawlModel(BaseModel):
finished_at: datetime | None = None
status: ProcessStatus = ProcessStatus.PENDING
html_crawl: BaseTaskModel = Field(default_factory=BaseTaskModel)
accessibility: AccessibilityModel | None = None
lighthouse: LighthouseModel | None = None
technologies_and_trackers: MetadataTask | None = None
responsiveness: MetadataTask | None = None
good_practices: MetadataTask | None = None
carbon_footprint: MetadataTask | None = None
uploads: BaseTaskModel = Field(default_factory=BaseTaskModel)

Expand All @@ -47,14 +46,12 @@ def enabled_metadata(self) -> list[MetadataType]:
]

def init_tasks(self) -> None:
if MetadataType.ACCESSIBILITY in self.enabled_metadata:
self.accessibility = AccessibilityModel()
if MetadataType.LIGHTHOUSE in self.enabled_metadata:
self.lighthouse = LighthouseModel()
if MetadataType.TECHNOLOGIES in self.enabled_metadata:
self.technologies_and_trackers = MetadataTask()
if MetadataType.RESPONSIVENESS in self.enabled_metadata:
self.responsiveness = MetadataTask()
if MetadataType.GOOD_PRACTICES in self.enabled_metadata:
self.good_practices = MetadataTask()
if MetadataType.CARBON_FOOTPRINT in self.enabled_metadata:
self.carbon_footprint = MetadataTask()

Expand Down
3 changes: 1 addition & 2 deletions app/models/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@


class MetadataType(StrEnum):
ACCESSIBILITY = "accessibility"
LIGHTHOUSE = "lighthouse"
TECHNOLOGIES = "technologies_and_trackers"
RESPONSIVENESS = "responsiveness"
GOOD_PRACTICES = "good_practices"
CARBON_FOOTPRINT = "carbon_footprint"


Expand Down
2 changes: 1 addition & 1 deletion app/models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ class MetadataTask(BaseTaskModel):
pass


class AccessibilityModel(MetadataTask):
class LighthouseModel(MetadataTask):
score: float | None = None
8 changes: 2 additions & 6 deletions app/models/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@
class UpdateWebsiteRequest(BaseModel):
depth: int | None = None
limit: int | None = None
accessibility: MetadataConfig | None = None
lighthouse: MetadataConfig | None = None
technologies_and_trackers: MetadataConfig | None = None
responsiveness: MetadataConfig | None = None
good_practices: MetadataConfig | None = None
carbon_footprint: MetadataConfig | None = None
headers: dict[str, Any] | None = None
tags: list[str] | None = None
Expand All @@ -26,16 +25,13 @@ class CreateWebsiteRequest(BaseModel):
url: str
depth: int = Field(ge=0, default=2)
limit: int = Field(ge=0, default=400)
accessibility: MetadataConfig = Field(default=MetadataConfig())
lighthouse: MetadataConfig = Field(default=MetadataConfig())
technologies_and_trackers: MetadataConfig = Field(
default=MetadataConfig(enabled=False)
)
responsiveness: MetadataConfig = Field(
default=MetadataConfig(enabled=False)
)
good_practices: MetadataConfig = Field(
default=MetadataConfig(enabled=False)
)
carbon_footprint: MetadataConfig = Field(
default=MetadataConfig(enabled=False)
)
Expand Down
6 changes: 2 additions & 4 deletions app/models/website.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ class WebsiteModel(BaseModel):
url: str
depth: int
limit: int
accessibility: MetadataConfig
lighthouse: MetadataConfig
technologies_and_trackers: MetadataConfig
responsiveness: MetadataConfig
good_practices: MetadataConfig
carbon_footprint: MetadataConfig
headers: dict[str, Any]
created_at: datetime = Field(default_factory=french_datetime)
Expand All @@ -36,10 +35,9 @@ def to_config(self) -> CrawlConfig:
url=self.url,
parameters=CrawlParameters(depth=self.depth, limit=self.limit),
metadata_config={
MetadataType.ACCESSIBILITY: self.accessibility,
MetadataType.LIGHTHOUSE: self.lighthouse,
MetadataType.TECHNOLOGIES: self.technologies_and_trackers,
MetadataType.RESPONSIVENESS: self.responsiveness,
MetadataType.GOOD_PRACTICES: self.good_practices,
MetadataType.CARBON_FOOTPRINT: self.carbon_footprint,
},
headers=self.headers,
Expand Down
67 changes: 0 additions & 67 deletions app/services/accessibility_best_practices_calculator.py

This file was deleted.

32 changes: 32 additions & 0 deletions app/services/lighthouse_calculator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import json
import subprocess
from typing import Any


class LighthouseError(Exception):
pass


class LighthouseCalculator:
def get_lighthouse(self, url: str) -> dict[str, Any]:
try:
lighthouse_process = subprocess.run(
" ".join(
[
"lighthouse",
url,
'--chrome-flags="--no-sandbox --headless --disable-dev-shm-usage"',
"--output=json",
"--disable-full-page-screenshot",
"--no-enable-error-reporting",
"--quiet",
]
),
stdout=subprocess.PIPE,
shell=True,
)
lighthouse_response = json.loads(lighthouse_process.stdout)
result = lighthouse_response
except Exception as e:
raise LighthouseError from e
return result
Loading

0 comments on commit 66535c8

Please sign in to comment.