From 66ca059dae182a7b3e169748d0bc0ff718a00eb1 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Mon, 2 Sep 2024 09:58:56 +0200 Subject: [PATCH] Improve typing of `LanguageProcessor`s (#666) --- services/report/languages/__init__.py | 29 --- services/report/languages/base.py | 13 +- services/report/languages/bullseye.py | 10 +- services/report/languages/clover.py | 15 +- services/report/languages/cobertura.py | 20 +- services/report/languages/coveralls.py | 20 +- services/report/languages/csharp.py | 19 +- services/report/languages/dlst.py | 11 +- services/report/languages/elm.py | 13 +- services/report/languages/flowcover.py | 13 +- services/report/languages/gap.py | 34 ++- services/report/languages/gcov.py | 13 +- services/report/languages/go.py | 6 +- services/report/languages/jacoco.py | 13 +- services/report/languages/jetbrainsxml.py | 15 +- services/report/languages/lcov.py | 16 +- services/report/languages/lua.py | 13 +- services/report/languages/mono.py | 15 +- services/report/languages/node.py | 19 +- services/report/languages/pycoverage.py | 12 +- services/report/languages/rlang.py | 16 +- services/report/languages/salesforce.py | 13 +- services/report/languages/scala.py | 10 +- services/report/languages/scoverage.py | 15 +- services/report/languages/simplecov.py | 10 +- services/report/languages/tests/test_base.py | 7 - .../languages/tests/unit/test_cobertura.py | 2 - .../languages/tests/unit/test_coveralls.py | 5 +- .../report/languages/tests/unit/test_gap.py | 14 +- .../report/languages/tests/unit/test_gcov.py | 9 +- .../report/languages/tests/unit/test_lcov.py | 9 +- .../report/languages/tests/unit/test_lua.py | 7 +- services/report/languages/v1.py | 10 +- services/report/languages/vb.py | 18 +- services/report/languages/vb2.py | 18 +- services/report/languages/xcode.py | 11 +- services/report/languages/xcodeplist.py | 15 +- services/report/report_builder.py | 7 +- services/report/report_processor.py | 236 ++++++++++-------- services/report/tests/unit/test_process.py | 40 +-- .../tests/unit/test_report_processor.py | 32 ++- 41 files changed, 408 insertions(+), 415 deletions(-) delete mode 100644 services/report/languages/tests/test_base.py diff --git a/services/report/languages/__init__.py b/services/report/languages/__init__.py index 61830735c..e69de29bb 100644 --- a/services/report/languages/__init__.py +++ b/services/report/languages/__init__.py @@ -1,29 +0,0 @@ -# ruff: noqa: F401 -from services.report.languages.bullseye import BullseyeProcessor -from services.report.languages.clover import CloverProcessor -from services.report.languages.cobertura import CoberturaProcessor -from services.report.languages.coveralls import CoverallsProcessor -from services.report.languages.csharp import CSharpProcessor -from services.report.languages.dlst import DLSTProcessor -from services.report.languages.elm import ElmProcessor -from services.report.languages.flowcover import FlowcoverProcessor -from services.report.languages.gap import GapProcessor -from services.report.languages.gcov import GcovProcessor -from services.report.languages.go import GoProcessor -from services.report.languages.jacoco import JacocoProcessor -from services.report.languages.jetbrainsxml import JetBrainsXMLProcessor -from services.report.languages.lcov import LcovProcessor -from services.report.languages.lua import LuaProcessor -from services.report.languages.mono import MonoProcessor -from services.report.languages.node import NodeProcessor -from services.report.languages.pycoverage import PyCoverageProcessor -from services.report.languages.rlang import RlangProcessor -from services.report.languages.salesforce import SalesforceProcessor -from services.report.languages.scala import ScalaProcessor -from services.report.languages.scoverage import SCoverageProcessor -from services.report.languages.simplecov import SimplecovProcessor -from services.report.languages.v1 import VOneProcessor -from services.report.languages.vb import VbProcessor -from services.report.languages.vb2 import VbTwoProcessor -from services.report.languages.xcode import XCodeProcessor -from services.report.languages.xcodeplist import XCodePlistProcessor diff --git a/services/report/languages/base.py b/services/report/languages/base.py index 38e13380e..06c0cc181 100644 --- a/services/report/languages/base.py +++ b/services/report/languages/base.py @@ -1,4 +1,3 @@ -import typing from typing import Any from shared.reports.resources import Report @@ -7,10 +6,6 @@ class BaseLanguageProcessor(object): - @property - def name(self): - return self.get_processor_name() - def __init__(self, *args, **kwargs) -> None: pass @@ -37,9 +32,7 @@ def matches_content(self, content: Any, first_line: str, name: str) -> bool: """ pass - def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder - ) -> Report: + def process(self, name: str, content: Any, report_builder: ReportBuilder) -> Report: """Processes a report uploaded by the user, returning a `Report` This is the base function which we need to implement @@ -61,7 +54,3 @@ def process( ReportExpiredException: If the report is considered expired """ pass - - @classmethod - def get_processor_name(cls) -> str: - return cls.__name__ diff --git a/services/report/languages/bullseye.py b/services/report/languages/bullseye.py index d14d3a31a..6bb30b631 100644 --- a/services/report/languages/bullseye.py +++ b/services/report/languages/bullseye.py @@ -1,6 +1,6 @@ -import typing from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report from timestring import Date @@ -18,17 +18,17 @@ class BullseyeProcessor(BaseLanguageProcessor): def matches_content(self, content: Element, first_line: str, name: str) -> bool: return "BullseyeCoverage" in content.tag + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: return from_xml(content, report_builder.create_report_builder_session(name)) -def from_xml(xml, report_builder_session: ReportBuilderSession): - path_fixer, ignored_lines, sessionid, yaml = ( +def from_xml(xml: Element, report_builder_session: ReportBuilderSession): + path_fixer, ignored_lines, yaml = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, report_builder_session.current_yaml, ) if read_yaml_field(yaml, ("codecov", "max_report_age"), "12h ago"): diff --git a/services/report/languages/clover.py b/services/report/languages/clover.py index f83d0f925..1ec387ebc 100644 --- a/services/report/languages/clover.py +++ b/services/report/languages/clover.py @@ -1,5 +1,6 @@ -import typing +from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report from timestring import Date @@ -14,11 +15,12 @@ class CloverProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content.tag == "coverage" and content.attrib.get("generated")) + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return content.tag == "coverage" and bool(content.attrib.get("generated")) + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_xml(content, report_builder_session) @@ -38,11 +40,10 @@ def get_end_of_file(filename, xmlfile): pass -def from_xml(xml, report_builder_session: ReportBuilderSession) -> Report: - path_fixer, ignored_lines, sessionid, yaml = ( +def from_xml(xml: Element, report_builder_session: ReportBuilderSession) -> Report: + path_fixer, ignored_lines, yaml = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, report_builder_session.current_yaml, ) diff --git a/services/report/languages/cobertura.py b/services/report/languages/cobertura.py index 6db581e23..765783e1f 100644 --- a/services/report/languages/cobertura.py +++ b/services/report/languages/cobertura.py @@ -1,8 +1,9 @@ import logging import re -import typing from typing import List +from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report from timestring import Date, TimestringInvalid @@ -19,13 +20,15 @@ class CoberturaProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - if bool(list(content.iter("coverage"))): - return True - return bool(list(content.iter("scoverage"))) + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return bool( + next(content.iter("coverage"), None) + or next(content.iter("scoverage"), None) + ) + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_xml(content, report_builder_session) @@ -43,11 +46,10 @@ def get_sources_to_attempt(xml) -> List[str]: return [s for s in sources if isinstance(s, str) and s.startswith("/")] -def from_xml(xml, report_builder_session: ReportBuilderSession) -> Report: - path_fixer, ignored_lines, sessionid, repo_yaml = ( +def from_xml(xml: Element, report_builder_session: ReportBuilderSession) -> Report: + path_fixer, ignored_lines, repo_yaml = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, report_builder_session.current_yaml, ) diff --git a/services/report/languages/coveralls.py b/services/report/languages/coveralls.py index f8c049e30..09784adda 100644 --- a/services/report/languages/coveralls.py +++ b/services/report/languages/coveralls.py @@ -1,5 +1,4 @@ -import typing - +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -11,26 +10,21 @@ class CoverallsProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return detect(content) + def matches_content(self, content: dict, first_line: str, name: str) -> bool: + return "source_files" in content + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: dict, report_builder: ReportBuilder ) -> Report: return from_json(content, report_builder.create_report_builder_session(name)) -def detect(report): - return "source_files" in report - - -def from_json(report, report_builder_session: ReportBuilderSession) -> Report: +def from_json(report: dict, report_builder_session: ReportBuilderSession) -> Report: # https://github.com/codecov/support/issues/253 - path_fixer, ignored_lines, sessionid, repo_yaml = ( + path_fixer, ignored_lines = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, - report_builder_session.current_yaml, ) for _file in report["source_files"]: filename = path_fixer(_file["name"]) diff --git a/services/report/languages/csharp.py b/services/report/languages/csharp.py index a438cbb2c..cb31c9646 100644 --- a/services/report/languages/csharp.py +++ b/services/report/languages/csharp.py @@ -1,7 +1,8 @@ -import typing from collections import defaultdict from itertools import repeat +from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -13,11 +14,12 @@ class CSharpProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content.tag == "CoverageSession") + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return content.tag == "CoverageSession" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_xml(content, report_builder_session) @@ -38,7 +40,7 @@ def _build_branches(branch_gen): return branches -def from_xml(xml, report_builder_session: ReportBuilderSession) -> Report: +def from_xml(xml: Element, report_builder_session: ReportBuilderSession) -> Report: """ https://github.com/OpenCover/opencover/issues/293#issuecomment-94598145 @sl - start line @@ -50,13 +52,10 @@ def from_xml(xml, report_builder_session: ReportBuilderSession) -> Report: @vc - statement executed """ - ignored_lines, sessionid = ( - report_builder_session.ignored_lines, - report_builder_session.sessionid, - ) + ignored_lines = report_builder_session.ignored_lines + # dict of {"fileid": "path"} file_by_id = {} - file_by_id_get = file_by_id.get file_by_name = {None: None} for f in xml.iter("File"): filename = report_builder_session.path_fixer( diff --git a/services/report/languages/dlst.py b/services/report/languages/dlst.py index 78606310e..4f45f5fa8 100644 --- a/services/report/languages/dlst.py +++ b/services/report/languages/dlst.py @@ -1,6 +1,6 @@ -import typing from io import BytesIO +import sentry_sdk from shared.reports.resources import Report from shared.reports.types import ReportLine @@ -12,16 +12,17 @@ class DLSTProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content[-7:] == b"covered") + def matches_content(self, content: bytes, first_line: str, name: str) -> bool: + return content[-7:] == b"covered" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: bytes, report_builder: ReportBuilder ) -> Report: return from_string(content, report_builder.create_report_builder_session(name)) -def from_string(string, report_builder_session: ReportBuilderSession) -> Report: +def from_string(string: bytes, report_builder_session: ReportBuilderSession) -> Report: path_fixer, ignored_lines, sessionid, filename = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, diff --git a/services/report/languages/elm.py b/services/report/languages/elm.py index 43c29b301..f00ee08ef 100644 --- a/services/report/languages/elm.py +++ b/services/report/languages/elm.py @@ -1,5 +1,4 @@ -import typing - +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -11,21 +10,21 @@ class ElmProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): + def matches_content(self, content: dict, first_line: str, name: str) -> bool: return isinstance(content, dict) and bool(content.get("coverageData")) + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: dict, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_json(content, report_builder_session) -def from_json(json, report_builder_session: ReportBuilderSession) -> Report: - path_fixer, ignored_lines, sessionid = ( +def from_json(json: dict, report_builder_session: ReportBuilderSession) -> Report: + path_fixer, ignored_lines = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, ) for name, data in json["coverageData"].items(): fn = path_fixer(json["moduleMap"][name]) diff --git a/services/report/languages/flowcover.py b/services/report/languages/flowcover.py index 111630c56..1ae0de9bc 100644 --- a/services/report/languages/flowcover.py +++ b/services/report/languages/flowcover.py @@ -1,5 +1,4 @@ -import typing - +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -11,11 +10,12 @@ class FlowcoverProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): + def matches_content(self, content: dict, first_line: str, name: str) -> bool: return isinstance(content, dict) and bool(content.get("flowStatus")) + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: dict, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session( filepath=name @@ -23,11 +23,10 @@ def process( return from_json(content, report_builder_session) -def from_json(json, report_builder_session: ReportBuilderSession) -> Report: - path_fixer, ignored_lines, sessionid = ( +def from_json(json: dict, report_builder_session: ReportBuilderSession) -> Report: + path_fixer, ignored_lines = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, ) for fn, data in json["files"].items(): diff --git a/services/report/languages/gap.py b/services/report/languages/gap.py index a1dbdd47a..e2d116b07 100644 --- a/services/report/languages/gap.py +++ b/services/report/languages/gap.py @@ -2,42 +2,40 @@ from io import BytesIO from json import dumps, loads +import sentry_sdk from shared.reports.resources import Report, ReportFile from shared.reports.types import ReportLine +from services.path_fixer import PathFixer from services.report.languages.base import BaseLanguageProcessor from services.report.report_builder import ReportBuilder class GapProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return detect(first_line) + def matches_content(self, content: typing.Any, first_line: str, name: str) -> bool: + try: + val = content if isinstance(content, dict) else loads(first_line) + return "Type" in val and "File" in val + except (TypeError, ValueError): + return False + @sentry_sdk.trace def process( self, name: str, content: typing.Any, report_builder: ReportBuilder ) -> Report: - path_fixer, ignored_lines, sessionid, repo_yaml = ( - report_builder.path_fixer, - report_builder.ignored_lines, - report_builder.sessionid, - report_builder.repo_yaml, - ) if isinstance(content, dict): content = dumps(content) if isinstance(content, str): content = content.encode() - return from_string(content, path_fixer, ignored_lines, sessionid) - - -def detect(string: bytes): - try: - val = loads(string) - return "Type" in val and "File" in val - except (TypeError, ValueError): - return False + return from_string( + content, + report_builder.path_fixer, + report_builder.ignored_lines, + report_builder.sessionid, + ) -def from_string(string, fix, ignored_lines, sessionid): +def from_string(string: bytes, fix: PathFixer, ignored_lines: dict, sessionid: int): # https://github.com/codecov/support/issues/253 report = Report() _file = None diff --git a/services/report/languages/gcov.py b/services/report/languages/gcov.py index be88a4a32..6e6049441 100644 --- a/services/report/languages/gcov.py +++ b/services/report/languages/gcov.py @@ -1,8 +1,8 @@ import re -import typing from collections import defaultdict from io import BytesIO +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -15,11 +15,12 @@ class GcovProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return detect(content) + def matches_content(self, content: bytes, first_line: str, name: str) -> bool: + return b"0:Source:" in content.split(b"\n", 1)[0] + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: bytes, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_txt(content, report_builder_session) @@ -30,10 +31,6 @@ def process( detect_conditional = re.compile(r"^\s+((if\s?\()|(\} else if\s?\())").match -def detect(report): - return b"0:Source:" in report.split(b"\n", 1)[0] - - def from_txt(string: bytes, report_builder_session: ReportBuilderSession) -> Report: name, fix, ignored_lines = ( report_builder_session._report_filepath, diff --git a/services/report/languages/go.py b/services/report/languages/go.py index 8cfd048ca..71a653cbd 100644 --- a/services/report/languages/go.py +++ b/services/report/languages/go.py @@ -3,6 +3,7 @@ from io import BytesIO from itertools import groupby +import sentry_sdk from shared.reports.resources import Report from shared.utils import merge from shared.utils.merge import LineType, line_type, partials_to_line @@ -18,11 +19,12 @@ class GoProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): + def matches_content(self, content: bytes, first_line: str, name: str) -> bool: return content[:6] == b"mode: " or ".go:" in first_line + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: bytes, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_txt(content, report_builder_session) diff --git a/services/report/languages/jacoco.py b/services/report/languages/jacoco.py index 1045165d9..07594d2cf 100644 --- a/services/report/languages/jacoco.py +++ b/services/report/languages/jacoco.py @@ -1,7 +1,8 @@ import logging -import typing from collections import defaultdict +from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report from shared.utils.merge import LineType, branch_type from timestring import Date @@ -19,17 +20,18 @@ class JacocoProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content.tag == "report") + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return content.tag == "report" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_xml(content, report_builder_session) -def from_xml(xml, report_builder_session: ReportBuilderSession): +def from_xml(xml: Element, report_builder_session: ReportBuilderSession): """ nr = line number mi = missed instructions @@ -40,7 +42,6 @@ def from_xml(xml, report_builder_session: ReportBuilderSession): path_fixer = report_builder_session.path_fixer yaml = report_builder_session.current_yaml ignored_lines = report_builder_session.ignored_lines - sessionid = report_builder_session.sessionid if read_yaml_field(yaml, ("codecov", "max_report_age"), "12h ago"): try: timestamp = next(xml.iter("sessioninfo")).get("start") diff --git a/services/report/languages/jetbrainsxml.py b/services/report/languages/jetbrainsxml.py index 7db68bbf1..87c5193c6 100644 --- a/services/report/languages/jetbrainsxml.py +++ b/services/report/languages/jetbrainsxml.py @@ -1,5 +1,6 @@ -import typing +from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -11,20 +12,20 @@ class JetBrainsXMLProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content.tag == "Root") + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return content.tag == "Root" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: return from_xml(content, report_builder.create_report_builder_session(name)) -def from_xml(xml, report_builder_session: ReportBuilderSession) -> Report: - path_fixer, ignored_lines, sessionid = ( +def from_xml(xml: Element, report_builder_session: ReportBuilderSession) -> Report: + path_fixer, ignored_lines = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, ) # dict of {"fileid": "path"} file_by_id = {} diff --git a/services/report/languages/lcov.py b/services/report/languages/lcov.py index 08e62f7e4..0fd83738c 100644 --- a/services/report/languages/lcov.py +++ b/services/report/languages/lcov.py @@ -1,9 +1,9 @@ import logging -import typing from collections import defaultdict from decimal import Decimal from io import BytesIO +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -17,22 +17,18 @@ class LcovProcessor(BaseLanguageProcessor): - def matches_content(self, content: bytes, first_line, name): - return detect(content) + def matches_content(self, content: bytes, first_line: str, name: str) -> bool: + return b"\nend_of_record" in content + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: bytes, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_txt(content, report_builder_session) -def detect(report): - return b"\nend_of_record" in report - - -# def from_txt(reports, fix, ignored_lines, sessionid): -def from_txt(reports, report_builder_session: ReportBuilderSession) -> Report: +def from_txt(reports: bytes, report_builder_session: ReportBuilderSession) -> Report: # http://ltp.sourceforge.net/coverage/lcov/geninfo.1.php # merge same files for string in reports.split(b"\nend_of_record"): diff --git a/services/report/languages/lua.py b/services/report/languages/lua.py index 012ede06e..a107dee13 100644 --- a/services/report/languages/lua.py +++ b/services/report/languages/lua.py @@ -1,6 +1,6 @@ import re -import typing +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -12,11 +12,12 @@ class LuaProcessor(BaseLanguageProcessor): - def matches_content(self, content: bytes, first_line, name): - return detect(content) + def matches_content(self, content: bytes, first_line: str, name: str) -> bool: + return content[:7] == b"=======" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: bytes, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_txt(content, report_builder_session) @@ -25,10 +26,6 @@ def process( docs = re.compile(r"^=+\n", re.M).split -def detect(report: bytes): - return report[:7] == b"=======" - - def from_txt(string: bytes, report_builder_session: ReportBuilderSession) -> Report: filename = None ignored_lines = report_builder_session.ignored_lines diff --git a/services/report/languages/mono.py b/services/report/languages/mono.py index fec2381ca..33fc31581 100644 --- a/services/report/languages/mono.py +++ b/services/report/languages/mono.py @@ -1,5 +1,6 @@ -import typing +from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -11,21 +12,21 @@ class MonoProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content.tag == "coverage" and content.find("assembly") is not None) + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return content.tag == "coverage" and content.find("assembly") is not None + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_xml(content, report_builder_session) -def from_xml(xml, report_builder_session: ReportBuilderSession) -> Report: - path_fixer, ignored_lines, sessionid = ( +def from_xml(xml: Element, report_builder_session: ReportBuilderSession) -> Report: + path_fixer, ignored_lines = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, ) # loop through methods for method in xml.iter("method"): diff --git a/services/report/languages/node.py b/services/report/languages/node.py index b1962e5ad..7f1f941d7 100644 --- a/services/report/languages/node.py +++ b/services/report/languages/node.py @@ -2,6 +2,7 @@ from collections import defaultdict from fractions import Fraction +import sentry_sdk from shared.reports.resources import Report from shared.utils.merge import partials_to_line @@ -15,13 +16,14 @@ class NodeProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - if not isinstance(content, dict): - return False - return all(isinstance(data, dict) for data in content.values()) + def matches_content(self, content: dict, first_line: str, name: str) -> bool: + return isinstance(content, dict) and all( + isinstance(data, dict) for data in content.values() + ) + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: dict, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_json(content, report_builder_session) @@ -336,15 +338,16 @@ def jscoverage(_file, data, report_builder_session: ReportBuilderSession): ) -def from_json(report_dict, report_builder_session: ReportBuilderSession) -> Report: +def from_json( + report_dict: dict, report_builder_session: ReportBuilderSession +) -> Report: config = ( read_yaml_field(report_builder_session.current_yaml, ("parsers", "javascript")) or {} ) - fix, ignored_lines, sessionid = ( + fix, ignored_lines = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, ) if config.get("enable_partials", False): diff --git a/services/report/languages/pycoverage.py b/services/report/languages/pycoverage.py index 3f50c8d96..92fb53fb4 100644 --- a/services/report/languages/pycoverage.py +++ b/services/report/languages/pycoverage.py @@ -1,5 +1,6 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -14,12 +15,12 @@ class PyCoverageProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name) -> bool: + def matches_content(self, content: dict, first_line: str, name: str) -> bool: return ( "meta" in content + and "files" in content and isinstance(content.get("meta"), dict) and "show_contexts" in content.get("meta") - and "files" in content ) def _normalize_label(self, testname) -> str: @@ -56,7 +57,10 @@ def _get_list_of_label_ids( return sorted(label_ids_for_line) - def process(self, name: str, content: Any, report_builder: ReportBuilder) -> Report: + @sentry_sdk.trace + def process( + self, name: str, content: dict, report_builder: ReportBuilder + ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) # Compressed pycoverage files will include a labels_table # Mapping label_idx: int --> label: str diff --git a/services/report/languages/rlang.py b/services/report/languages/rlang.py index 7eef32714..f2bbdf3f8 100644 --- a/services/report/languages/rlang.py +++ b/services/report/languages/rlang.py @@ -1,29 +1,29 @@ -import typing - +import sentry_sdk from shared.reports.resources import Report, ReportFile from shared.reports.types import ReportLine +from services.path_fixer import PathFixer from services.report.languages.base import BaseLanguageProcessor from services.report.report_builder import ReportBuilder class RlangProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): + def matches_content(self, content: dict, first_line: str, name: str) -> bool: return isinstance(content, dict) and content.get("uploader") == "R" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: dict, report_builder: ReportBuilder ) -> Report: - path_fixer, ignored_lines, sessionid, repo_yaml = ( + return from_json( + content, report_builder.path_fixer, report_builder.ignored_lines, report_builder.sessionid, - report_builder.repo_yaml, ) - return from_json(content, path_fixer, ignored_lines, sessionid) -def from_json(data_dict, fix, ignored_lines, sessionid): +def from_json(data_dict: dict, fix: PathFixer, ignored_lines: dict, sessionid: int): """ Report example diff --git a/services/report/languages/salesforce.py b/services/report/languages/salesforce.py index 7975e6ea7..9b63c6329 100644 --- a/services/report/languages/salesforce.py +++ b/services/report/languages/salesforce.py @@ -1,5 +1,4 @@ -import typing - +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -11,21 +10,21 @@ class SalesforceProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): + def matches_content(self, content: list, first_line: str, name: str) -> bool: return bool(content) and isinstance(content, list) and "name" in content[0] + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: list, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_json(content, report_builder_session) -def from_json(json, report_builder_session: ReportBuilderSession) -> Report: - path_fixer, ignored_lines, sessionid = ( +def from_json(json: list, report_builder_session: ReportBuilderSession) -> Report: + path_fixer, ignored_lines = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, ) for obj in json: if obj and obj.get("name") and obj.get("lines"): diff --git a/services/report/languages/scala.py b/services/report/languages/scala.py index af6b644c3..c63b49f64 100644 --- a/services/report/languages/scala.py +++ b/services/report/languages/scala.py @@ -1,5 +1,4 @@ -import typing - +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -11,17 +10,18 @@ class ScalaProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): + def matches_content(self, content: dict, first_line: str, name: str) -> bool: return "fileReports" in content + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: dict, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_json(content, report_builder_session) -def from_json(data_dict, report_builder_session: ReportBuilderSession) -> Report: +def from_json(data_dict: dict, report_builder_session: ReportBuilderSession) -> Report: ignored_lines = report_builder_session.ignored_lines for f in data_dict["fileReports"]: filename = report_builder_session.path_fixer(f["filename"]) diff --git a/services/report/languages/scoverage.py b/services/report/languages/scoverage.py index bb7de8a6f..5aa42cc60 100644 --- a/services/report/languages/scoverage.py +++ b/services/report/languages/scoverage.py @@ -1,5 +1,6 @@ -import typing +from xml.etree.ElementTree import Element +import sentry_sdk from shared.helpers.numeric import maxint from shared.reports.resources import Report @@ -12,21 +13,21 @@ class SCoverageProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content.tag == "statements") + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return content.tag == "statements" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_xml(content, report_builder_session) -def from_xml(xml, report_builder_session: ReportBuilderSession) -> Report: - path_fixer, ignored_lines, sessionid = ( +def from_xml(xml: Element, report_builder_session: ReportBuilderSession) -> Report: + path_fixer, ignored_lines = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, - report_builder_session.sessionid, ) ignore = [] diff --git a/services/report/languages/simplecov.py b/services/report/languages/simplecov.py index db9ea9dfa..ba90c4b7e 100644 --- a/services/report/languages/simplecov.py +++ b/services/report/languages/simplecov.py @@ -1,5 +1,4 @@ -import typing - +import sentry_sdk from shared.reports.resources import Report from services.report.languages.base import BaseLanguageProcessor @@ -17,17 +16,18 @@ class SimplecovProcessor(BaseLanguageProcessor): """ - def matches_content(self, content, first_line, name): + def matches_content(self, content: dict, first_line: str, name: str) -> bool: return isinstance(content, dict) and content.get("command_name") == "RSpec" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: dict, report_builder: ReportBuilder ) -> Report: report_builder_session = report_builder.create_report_builder_session(name) return from_json(content, report_builder_session) -def from_json(json, report_builder_session: ReportBuilderSession) -> Report: +def from_json(json: dict, report_builder_session: ReportBuilderSession) -> Report: ignored_lines = report_builder_session.ignored_lines for data in json["files"]: fn = report_builder_session.path_fixer(data["filename"]) diff --git a/services/report/languages/tests/test_base.py b/services/report/languages/tests/test_base.py deleted file mode 100644 index bb0f43e3a..000000000 --- a/services/report/languages/tests/test_base.py +++ /dev/null @@ -1,7 +0,0 @@ -from services.report.languages import CloverProcessor -from test_utils.base import BaseTestCase - - -class TestBaseProcessor(BaseTestCase): - def test_name(self): - assert CloverProcessor.get_processor_name() == "CloverProcessor" diff --git a/services/report/languages/tests/unit/test_cobertura.py b/services/report/languages/tests/unit/test_cobertura.py index b1d4bf513..f9f0d9c78 100644 --- a/services/report/languages/tests/unit/test_cobertura.py +++ b/services/report/languages/tests/unit/test_cobertura.py @@ -465,7 +465,6 @@ def test_use_source_for_filename_if_one_path_source(self): /user/repo """ - processor = cobertura.CoberturaProcessor() report_builder = ReportBuilder( path_fixer=lambda path, bases_to_try: [ os.path.join(b, path) for b in bases_to_try @@ -492,7 +491,6 @@ def test_use_source_for_filename_if_one_bad_source(self): not a path """ - processor = cobertura.CoberturaProcessor() report_builder = ReportBuilder( path_fixer=lambda path, bases_to_try: path, ignored_lines={}, diff --git a/services/report/languages/tests/unit/test_coveralls.py b/services/report/languages/tests/unit/test_coveralls.py index 7d8062b20..05eb353af 100644 --- a/services/report/languages/tests/unit/test_coveralls.py +++ b/services/report/languages/tests/unit/test_coveralls.py @@ -22,8 +22,9 @@ class TestCoveralls(BaseTestCase): def test_detect(self): - assert coveralls.detect({"source_files": ""}) - assert not coveralls.detect({"coverage": ""}) + processor = coveralls.CoverallsProcessor() + assert processor.matches_content({"source_files": ""}, "", "") + assert not processor.matches_content({"coverage": ""}, "", "") def test_report(self): def fixes(path): diff --git a/services/report/languages/tests/unit/test_gap.py b/services/report/languages/tests/unit/test_gap.py index b8f03cfda..6ff9e3ee8 100644 --- a/services/report/languages/tests/unit/test_gap.py +++ b/services/report/languages/tests/unit/test_gap.py @@ -61,7 +61,13 @@ def test_report_from_dict(self): assert expected_result_archive == processed_report["archive"] def test_detect(self): - assert gap.detect(b"") is False - assert gap.detect(b'{"Type":"S","File":"lib/error.g","FileId":37}') is True - assert gap.detect(b'{"coverage"}') is False - assert gap.detect(b"-1.7") is False + processor = gap.GapProcessor() + assert processor.matches_content(b"", "", "") is False + assert ( + processor.matches_content( + b"", '{"Type":"S","File":"lib/error.g","FileId":37}', "" + ) + is True + ) + assert processor.matches_content(b'{"coverage"}', "", "") is False + assert processor.matches_content(b"-1.7", "", "") is False diff --git a/services/report/languages/tests/unit/test_gcov.py b/services/report/languages/tests/unit/test_gcov.py index 1afb78b0d..539806c97 100644 --- a/services/report/languages/tests/unit/test_gcov.py +++ b/services/report/languages/tests/unit/test_gcov.py @@ -339,10 +339,11 @@ def test_no_yaml(self): ) def test_detect(self): - assert gcov.detect(b" -: 0:Source:black") is True - assert gcov.detect(b"..... 0:Source:white") is True - assert gcov.detect(b"") is False - assert gcov.detect(b"0:Source") is False + processor = gcov.GcovProcessor() + assert processor.matches_content(b" -: 0:Source:black", "", "") is True + assert processor.matches_content(b"..... 0:Source:white", "", "") is True + assert processor.matches_content(b"", "", "") is False + assert processor.matches_content(b"0:Source", "", "") is False def test_ignored(self): report_builder = ReportBuilder( diff --git a/services/report/languages/tests/unit/test_lcov.py b/services/report/languages/tests/unit/test_lcov.py index 7ca0fdd72..ebf9bea9e 100644 --- a/services/report/languages/tests/unit/test_lcov.py +++ b/services/report/languages/tests/unit/test_lcov.py @@ -161,10 +161,11 @@ def fixes(path): assert expected_result_archive == processed_report["archive"] def test_detect(self): - assert lcov.detect(b"hello\nend_of_record\n") is True - assert lcov.detect(txt) is True - assert lcov.detect(b"hello_end_of_record") is False - assert lcov.detect(b"") is False + processor = lcov.LcovProcessor() + assert processor.matches_content(b"hello\nend_of_record\n", "", "") is True + assert processor.matches_content(txt, "", "") is True + assert processor.matches_content(b"hello_end_of_record", "", "") is False + assert processor.matches_content(b"", "", "") is False def test_negative_execution_count(self): text = "\n".join( diff --git a/services/report/languages/tests/unit/test_lua.py b/services/report/languages/tests/unit/test_lua.py index 4b75e0b7c..609e5cf9b 100644 --- a/services/report/languages/tests/unit/test_lua.py +++ b/services/report/languages/tests/unit/test_lua.py @@ -125,6 +125,7 @@ def test_report_with_line_breaks_in_the_beginning(self): assert expected_result_archive == processed_report["archive"] def test_detect(self): - assert lua.detect(b"=========") is True - assert lua.detect(b"=== fefef") is False - assert lua.detect(b"") is False + processor = lua.LuaProcessor() + assert processor.matches_content(b"=========", "", "") is True + assert processor.matches_content(b"=== fefef", "", "") is False + assert processor.matches_content(b"", "", "") is False diff --git a/services/report/languages/v1.py b/services/report/languages/v1.py index bf9b20992..41a07f206 100644 --- a/services/report/languages/v1.py +++ b/services/report/languages/v1.py @@ -1,5 +1,4 @@ -import typing - +import sentry_sdk from shared.reports.resources import Report from helpers.exceptions import CorruptRawReportError @@ -12,11 +11,12 @@ class VOneProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): + def matches_content(self, content: dict, first_line: str, name: str) -> bool: return "coverage" in content or "RSpec" in content or "MiniTest" in content + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: dict, report_builder: ReportBuilder ) -> Report: if "RSpec" in content: content = content["RSpec"] @@ -52,7 +52,7 @@ def _list_to_dict(lines): return lines or {} -def from_json(json, report_builder_session: ReportBuilderSession) -> Report: +def from_json(json: str, report_builder_session: ReportBuilderSession) -> Report: if isinstance(json["coverage"], dict): # messages = json.get('messages', {}) for fn, lns in json["coverage"].items(): diff --git a/services/report/languages/vb.py b/services/report/languages/vb.py index be9cae1a7..edf15d1b9 100644 --- a/services/report/languages/vb.py +++ b/services/report/languages/vb.py @@ -1,29 +1,31 @@ -import typing +from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report, ReportFile from shared.reports.types import ReportLine +from services.path_fixer import PathFixer from services.report.languages.base import BaseLanguageProcessor from services.report.report_builder import ReportBuilder class VbProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content.tag == "results") + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return content.tag == "results" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: - path_fixer, ignored_lines, sessionid, repo_yaml = ( + return from_xml( + content, report_builder.path_fixer, report_builder.ignored_lines, report_builder.sessionid, - report_builder.repo_yaml, ) - return from_xml(content, path_fixer, ignored_lines, sessionid) -def from_xml(xml, fix, ignored_lines, sessionid): +def from_xml(xml: Element, fix: PathFixer, ignored_lines: dict, sessionid: int): report = Report() for module in xml.iter("module"): file_by_source = {} diff --git a/services/report/languages/vb2.py b/services/report/languages/vb2.py index 6d0017e7f..6a06a3b3a 100644 --- a/services/report/languages/vb2.py +++ b/services/report/languages/vb2.py @@ -1,29 +1,31 @@ -import typing +from xml.etree.ElementTree import Element +import sentry_sdk from shared.reports.resources import Report, ReportFile from shared.reports.types import ReportLine +from services.path_fixer import PathFixer from services.report.languages.base import BaseLanguageProcessor from services.report.report_builder import ReportBuilder class VbTwoProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): - return bool(content.tag == "CoverageDSPriv") + def matches_content(self, content: Element, first_line: str, name: str) -> bool: + return content.tag == "CoverageDSPriv" + @sentry_sdk.trace def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder + self, name: str, content: Element, report_builder: ReportBuilder ) -> Report: - path_fixer, ignored_lines, sessionid, repo_yaml = ( + return from_xml( + content, report_builder.path_fixer, report_builder.ignored_lines, report_builder.sessionid, - report_builder.repo_yaml, ) - return from_xml(content, path_fixer, ignored_lines, sessionid) -def from_xml(xml, fix, ignored_lines, sessionid): +def from_xml(xml: Element, fix: PathFixer, ignored_lines: dict, sessionid: int): file_by_source = {} for source in xml.iter("SourceFileNames"): filename = fix(source.find("SourceFileName").text.replace("\\", "/")) diff --git a/services/report/languages/xcode.py b/services/report/languages/xcode.py index 2e003fe29..355aa4e6d 100644 --- a/services/report/languages/xcode.py +++ b/services/report/languages/xcode.py @@ -1,6 +1,6 @@ -import typing from io import BytesIO +import sentry_sdk from shared.helpers.numeric import maxint from shared.reports.resources import Report from shared.reports.types import ReportLine @@ -19,7 +19,7 @@ class XCodeProcessor(BaseLanguageProcessor): - def matches_content(self, content, first_line, name): + def matches_content(self, content: bytes, first_line: str, name: str) -> bool: return name.endswith( ("app.coverage.txt", "framework.coverage.txt", "xctest.coverage.txt") ) or first_line.endswith( @@ -38,9 +38,8 @@ def matches_content(self, content, first_line, name): ) ) - def process( - self, name: str, content: typing.Any, report_builder: ReportBuilder - ) -> Report: + @sentry_sdk.trace + def process(self, name: str, content: str, report_builder: ReportBuilder) -> Report: return from_txt(content, report_builder.create_report_builder_session(name)) @@ -72,7 +71,7 @@ def get_partials_in_line(line): return partials -def from_txt(content, report_builder_session: ReportBuilderSession) -> Report: +def from_txt(content: bytes, report_builder_session: ReportBuilderSession) -> Report: path_fixer, ignored_lines, sessionid = ( report_builder_session.path_fixer, report_builder_session.ignored_lines, diff --git a/services/report/languages/xcodeplist.py b/services/report/languages/xcodeplist.py index 8b7fd8b19..0bb86edeb 100644 --- a/services/report/languages/xcodeplist.py +++ b/services/report/languages/xcodeplist.py @@ -1,33 +1,34 @@ import plistlib -import typing +import sentry_sdk from shared.reports.resources import Report, ReportFile from shared.reports.types import LineSession, ReportLine +from services.path_fixer import PathFixer from services.report.languages.base import BaseLanguageProcessor from services.report.report_builder import ReportBuilder class XCodePlistProcessor(BaseLanguageProcessor): - def matches_content(self, content: bytes, first_line, name): + def matches_content(self, content: bytes, first_line: str, name: str) -> bool: if name: return name.endswith("xccoverage.plist") if content.find(b'') > -1 and content.startswith(b" Report: - path_fixer, ignored_lines, sessionid, repo_yaml = ( + return from_xml( + content, report_builder.path_fixer, report_builder.ignored_lines, report_builder.sessionid, - report_builder.repo_yaml, ) - return from_xml(content, path_fixer, ignored_lines, sessionid) -def from_xml(xml: bytes, fix, ignored_lines, sessionid): +def from_xml(xml: bytes, fix: PathFixer, ignored_lines: dict, sessionid: int): objects = plistlib.loads(xml)["$objects"] _report = Report() diff --git a/services/report/report_builder.py b/services/report/report_builder.py index 21af93002..1354e2a35 100644 --- a/services/report/report_builder.py +++ b/services/report/report_builder.py @@ -28,7 +28,10 @@ def map_to_string(self): class ReportBuilderSession(object): def __init__( - self, report_builder, report_filepath, should_use_label_index: bool = False + self, + report_builder: "ReportBuilder", + report_filepath: str, + should_use_label_index: bool = False, ): self._report_builder = report_builder self._report_filepath = report_filepath @@ -240,7 +243,7 @@ def __init__( self, current_yaml: UserYaml, sessionid: int, - ignored_lines, + ignored_lines: dict, path_fixer: PathFixer, should_use_label_index: bool = False, ): diff --git a/services/report/report_processor.py b/services/report/report_processor.py index bfc821078..f1caf86c3 100644 --- a/services/report/report_processor.py +++ b/services/report/report_processor.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - +import json import logging -import numbers -from json import load -from typing import Any, Dict, List, Optional, Tuple +from typing import Literal +from xml.etree.ElementTree import Element import sentry_sdk from lxml import etree @@ -12,40 +10,40 @@ from helpers.exceptions import CorruptRawReportError from helpers.metrics import KiB, MiB -from services.report.languages import ( - BullseyeProcessor, - CloverProcessor, - CoberturaProcessor, - CoverallsProcessor, - CSharpProcessor, - DLSTProcessor, - ElmProcessor, - FlowcoverProcessor, - GapProcessor, - GcovProcessor, - GoProcessor, - JacocoProcessor, - JetBrainsXMLProcessor, - LcovProcessor, - LuaProcessor, - MonoProcessor, - NodeProcessor, - PyCoverageProcessor, - RlangProcessor, - SalesforceProcessor, - ScalaProcessor, - SCoverageProcessor, - SimplecovProcessor, - VbProcessor, - VbTwoProcessor, - VOneProcessor, - XCodePlistProcessor, - XCodeProcessor, -) +from services.report.languages.base import BaseLanguageProcessor from services.report.languages.helpers import remove_non_ascii from services.report.parser.types import ParsedUploadedReportFile from services.report.report_builder import ReportBuilder +from .languages.bullseye import BullseyeProcessor +from .languages.clover import CloverProcessor +from .languages.cobertura import CoberturaProcessor +from .languages.coveralls import CoverallsProcessor +from .languages.csharp import CSharpProcessor +from .languages.dlst import DLSTProcessor +from .languages.elm import ElmProcessor +from .languages.flowcover import FlowcoverProcessor +from .languages.gap import GapProcessor +from .languages.gcov import GcovProcessor +from .languages.go import GoProcessor +from .languages.jacoco import JacocoProcessor +from .languages.jetbrainsxml import JetBrainsXMLProcessor +from .languages.lcov import LcovProcessor +from .languages.lua import LuaProcessor +from .languages.mono import MonoProcessor +from .languages.node import NodeProcessor +from .languages.pycoverage import PyCoverageProcessor +from .languages.rlang import RlangProcessor +from .languages.salesforce import SalesforceProcessor +from .languages.scala import ScalaProcessor +from .languages.scoverage import SCoverageProcessor +from .languages.simplecov import SimplecovProcessor +from .languages.v1 import VOneProcessor +from .languages.vb import VbProcessor +from .languages.vb2 import VbTwoProcessor +from .languages.xcode import XCodeProcessor +from .languages.xcodeplist import XCodePlistProcessor + log = logging.getLogger(__name__) @@ -83,8 +81,13 @@ ) -def report_type_matching(report: ParsedUploadedReportFile) -> Tuple[Any, Optional[str]]: - first_line = remove_non_ascii(report.get_first_line().decode(errors="replace")) +def report_type_matching( + report: ParsedUploadedReportFile, first_line: str +) -> ( + tuple[bytes, Literal["txt"] | Literal["plist"]] + | tuple[dict | list, Literal["json"]] + | tuple[Element, Literal["xml"]] +): name = report.filename or "" raw_report = report.contents xcode_first_line_endings = ( @@ -111,29 +114,45 @@ def report_type_matching(report: ParsedUploadedReportFile) -> Tuple[Any, Optiona return raw_report, "txt" if raw_report.find(b'') >= 0 or name.endswith(".plist"): return raw_report, "plist" - if raw_report: - try: - processed = load(report.file_contents) - if processed != dict() and not isinstance(processed, numbers.Number): - return processed, "json" - except ValueError: - pass - if b"" in raw_report: - return None, None - try: - parser = etree.XMLParser(recover=True, resolve_entities=False) - processed = etree.fromstring(raw_report, parser=parser) - if processed is not None and len(processed) > 0: - return processed, "xml" - except ValueError: - pass + if not raw_report: + return raw_report, "txt" + try: + processed = json.load(report.file_contents) + if isinstance(processed, dict) or isinstance(processed, list): + return processed, "json" + except ValueError: + pass + try: + parser = etree.XMLParser(recover=True, resolve_entities=False) + processed = etree.fromstring(raw_report, parser=parser) + if processed is not None and len(processed) > 0: + return processed, "xml" + except ValueError: + pass return raw_report, "txt" -def get_possible_processors_list(report_type: str) -> List[Any]: - processor_dict: Dict[str, List[Any]] = { - "plist": [XCodePlistProcessor()], - "xml": [ +def process_report( + report: ParsedUploadedReportFile, report_builder: ReportBuilder +) -> Report | None: + report_filename = report.filename or "" + first_line = remove_non_ascii(report.get_first_line().decode(errors="replace")) + raw_report = report.contents + + if b"" in raw_report: + log.warning( + "Ignored report", + extra=dict(report_filename=report_filename, first_line=first_line[:100]), + ) + return None + + parsed_report, report_type = report_type_matching(report, first_line) + + processors: list[BaseLanguageProcessor] = [] + if report_type == "plist": + processors = [XCodePlistProcessor()] + elif report_type == "xml": + processors = [ BullseyeProcessor(), SCoverageProcessor(), JetBrainsXMLProcessor(), @@ -144,8 +163,12 @@ def get_possible_processors_list(report_type: str) -> List[Any]: VbProcessor(), VbTwoProcessor(), CoberturaProcessor(), - ], - "txt": [ + ] + elif report_type == "txt": + if parsed_report[-11:] == b"has no code": + # empty [dlst] + return None + processors = [ LcovProcessor(), GcovProcessor(), LuaProcessor(), @@ -153,8 +176,9 @@ def get_possible_processors_list(report_type: str) -> List[Any]: DLSTProcessor(), GoProcessor(), XCodeProcessor(), - ], - "json": [ + ] + elif report_type == "json": + processors = [ SalesforceProcessor(), ElmProcessor(), RlangProcessor(), @@ -166,60 +190,52 @@ def get_possible_processors_list(report_type: str) -> List[Any]: GapProcessor(), PyCoverageProcessor(), NodeProcessor(), - ], - } - return processor_dict.get(report_type, []) + ] - -def process_report( - report: ParsedUploadedReportFile, report_builder: ReportBuilder -) -> Optional[Report]: - name = report.filename or "" - first_line = remove_non_ascii(report.get_first_line().decode(errors="replace")) - parsed_report, report_type = report_type_matching(report) - if report_type == "txt" and parsed_report[-11:] == b"has no code": - # empty [dlst] - return None - processors = get_possible_processors_list(report_type) if report_type else [] for processor in processors: - if processor.matches_content(parsed_report, first_line, name): - sentry_sdk.metrics.incr( - "services.report.report_processor.parser", - tags={"type": type(processor).__name__}, - ) - RAW_REPORT_SIZE.labels(processor=processor.name).observe(report.size) - with RAW_REPORT_PROCESSOR_RUNTIME_SECONDS.labels( - processor=processor.name - ).time(): - try: - res = processor.process(name, parsed_report, report_builder) - RAW_REPORT_PROCESSOR_COUNTER.labels( - processor=processor.name, result="success" - ).inc() - return res - except CorruptRawReportError as e: - log.warning( - "Processor matched file but later a problem with file was discovered", - extra=dict( - processor_name=processor.name, - expected_format=e.expected_format, - corruption_error=e.corruption_error, - ), - exc_info=True, - ) - RAW_REPORT_PROCESSOR_COUNTER.labels( - processor=processor.name, result="corrupt_raw_report" - ).inc() - return None - except Exception: - RAW_REPORT_PROCESSOR_COUNTER.labels( - processor=processor.name, result="failure" - ).inc() - raise + if not processor.matches_content(parsed_report, first_line, report_filename): + continue + processor_name = type(processor).__name__ + + sentry_sdk.metrics.incr( + "services.report.report_processor.parser", + tags={"type": processor_name}, + ) + RAW_REPORT_SIZE.labels(processor=processor_name).observe(report.size) + with RAW_REPORT_PROCESSOR_RUNTIME_SECONDS.labels( + processor=processor_name + ).time(): + try: + res = processor.process(report_filename, parsed_report, report_builder) + RAW_REPORT_PROCESSOR_COUNTER.labels( + processor=processor_name, result="success" + ).inc() + return res + except CorruptRawReportError as e: + log.warning( + "Processor matched file but later a problem with file was discovered", + extra=dict( + processor_name=processor_name, + expected_format=e.expected_format, + corruption_error=e.corruption_error, + ), + exc_info=True, + ) + RAW_REPORT_PROCESSOR_COUNTER.labels( + processor=processor_name, result="corrupt_raw_report" + ).inc() + return None + except Exception: + RAW_REPORT_PROCESSOR_COUNTER.labels( + processor=processor_name, result="failure" + ).inc() + raise log.warning( "File format could not be recognized", extra=dict( - report_filename=name, first_line=first_line[:100], report_type=report_type + report_filename=report_filename, + first_line=first_line[:100], + report_type=report_type, ), ) return None diff --git a/services/report/tests/unit/test_process.py b/services/report/tests/unit/test_process.py index 6636fbcfe..210872b6e 100644 --- a/services/report/tests/unit/test_process.py +++ b/services/report/tests/unit/test_process.py @@ -790,21 +790,25 @@ def test_format_not_recognized(self, mocker): ) assert result is None assert mocked.called - mocked.assert_called_with(r) + mocked.assert_called_with(r, "") def test_process_report_exception_raised(self, mocker): class SpecialUnexpectedException(Exception): pass - mock_bad_processor = mocker.MagicMock( - matches_content=mocker.MagicMock(return_value=True), - process=mocker.MagicMock(side_effect=SpecialUnexpectedException()), - name="mock_bad_processor", + mocker.patch( + "services.report.report_processor.report_type_matching", + return_value=(b"", "plist"), + ) + mocker.patch( + "services.report.report_processor.XCodePlistProcessor.matches_content", + return_value=True, ) - mock_possible_list = mocker.patch( - "services.report.report_processor.get_possible_processors_list" + mocker.patch( + "services.report.report_processor.XCodePlistProcessor.process", + side_effect=SpecialUnexpectedException(), ) - mock_possible_list.return_value = [mock_bad_processor] + with pytest.raises(SpecialUnexpectedException): process.process_report( report=ParsedUploadedReportFile( @@ -817,19 +821,19 @@ class SpecialUnexpectedException(Exception): ) def test_process_report_corrupt_format(self, mocker): - mock_bad_processor = mocker.MagicMock( - matches_content=mocker.MagicMock(return_value=True), - process=mocker.MagicMock( - side_effect=CorruptRawReportError( - "expected_format", "error_explanation" - ) - ), - name="mock_bad_processor", + mocker.patch( + "services.report.report_processor.report_type_matching", + return_value=(b"", "plist"), ) mocker.patch( - "services.report.report_processor.get_possible_processors_list", - return_value=[mock_bad_processor], + "services.report.report_processor.XCodePlistProcessor.matches_content", + return_value=True, ) + mocker.patch( + "services.report.report_processor.XCodePlistProcessor.process", + side_effect=CorruptRawReportError("expected_format", "error_explanation"), + ) + res = process.process_report( report=ParsedUploadedReportFile( filename="/Users/path/to/app.coverage.txt", diff --git a/services/report/tests/unit/test_report_processor.py b/services/report/tests/unit/test_report_processor.py index 066e0103d..c770d5a4b 100644 --- a/services/report/tests/unit/test_report_processor.py +++ b/services/report/tests/unit/test_report_processor.py @@ -17,21 +17,24 @@ class TestReportTypeMatching(object): def test_report_type_matching(self): assert ( report_type_matching( - ParsedUploadedReportFile(filename="name", file_contents=BytesIO(b"")) + ParsedUploadedReportFile(filename="name", file_contents=BytesIO(b"")), + "", )[1] == "txt" ) assert ( report_type_matching( - ParsedUploadedReportFile(filename="name", file_contents=BytesIO(b"{}")) + ParsedUploadedReportFile(filename="name", file_contents=BytesIO(b"{}")), + "{}", )[1] - == "txt" + == "json" ) assert ( report_type_matching( ParsedUploadedReportFile( filename="name", file_contents=BytesIO(xcode_report.encode()) - ) + ), + "", )[1] == "txt" ) @@ -39,13 +42,15 @@ def test_report_type_matching(self): ParsedUploadedReportFile( filename="name", file_contents=BytesIO(json.dumps({"value": 1}).encode()), - ) + ), + "{value: 1}", ) == ({"value": 1}, "json") assert report_type_matching( ParsedUploadedReportFile( filename="name", file_contents=BytesIO(('\n\n{"value": 1}').encode()), - ) + ), + "", ) == ({"value": 1}, "json") assert ( report_type_matching( @@ -54,7 +59,8 @@ def test_report_type_matching(self): file_contents=BytesIO( 'source.scala'.encode() ), - ) + ), + "", )[1] == "xml" ) @@ -65,7 +71,8 @@ def test_report_type_matching(self): file_contents=BytesIO( '\n\n\n\n\nsource.scala'.encode() ), - ) + ), + "", )[1] == "xml" ) @@ -76,17 +83,20 @@ def test_report_type_matching(self): file_contents=BytesIO( '\ufeffsource.scala'.encode() ), - ) + ), + "", )[1] == "xml" ) assert report_type_matching( ParsedUploadedReportFile( filename="name", file_contents=BytesIO("normal file".encode()) - ) + ), + "normal file", ) == (b"normal file", "txt") assert report_type_matching( ParsedUploadedReportFile( filename="name", file_contents=BytesIO("1".encode()) - ) + ), + "1", ) == (b"1", "txt")