Migrate to ruff.

scrapy · Jan 30, 2025 · 5b2677b · 5b2677b
1 parent c61c97c
commit 5b2677b
Show file tree

Hide file tree

Showing 17 changed files with 196 additions and 112 deletions.
diff --git a/.bandit.yml b/.bandit.yml
diff --git a/.flake8 b/.flake8
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,23 +1,7 @@
 repos:
-- repo: https://github.com/PyCQA/bandit
-  rev: 1.7.10
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.9.3
   hooks:
-  - id: bandit
-    args: [-r, -c, .bandit.yml]
-- repo: https://github.com/PyCQA/flake8
-  rev: 7.1.1
-  hooks:
-  - id: flake8
-- repo: https://github.com/psf/black.git
-  rev: 24.10.0
-  hooks:
-  - id: black
-- repo: https://github.com/pycqa/isort
-  rev: 5.13.2
-  hooks:
-  - id: isort
--   repo: https://github.com/asottile/pyupgrade
-    rev: v3.19.1
-    hooks:
-    -   id: pyupgrade
-        args: [--py39-plus]
+    - id: ruff
+      args: [ --fix ]
+    - id: ruff-format
diff --git a/docs/conf.py b/docs/conf.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python
 
-import os
 import sys
+from pathlib import Path
 
 # Get the project root dir, which is the parent dir of this
-cwd = os.getcwd()
-project_root = os.path.dirname(cwd)
+project_root = str(Path.cwd().parent)
 
 # Insert the project root dir as the first element in the PYTHONPATH.
 # This lets us ensure that the source package is imported, and that its

diff --git a/docs/conftest.py b/docs/conftest.py
@@ -1,5 +1,5 @@
-import os
 from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
+from pathlib import Path
 
 from sybil import Sybil
 
@@ -16,9 +16,8 @@
 
 
 def load_selector(filename, **kwargs):
-    input_path = os.path.join(os.path.dirname(__file__), "_static", filename)
-    with open(input_path, encoding="utf-8") as input_file:
-        return Selector(text=input_file.read(), **kwargs)
+    input_path = Path(__file__).parent / "_static" / filename
+    return Selector(text=input_path.read_text(encoding="utf-8"), **kwargs)
 
 
 def setup(namespace):

diff --git a/parsel/__init__.py b/parsel/__init__.py
@@ -13,8 +13,8 @@
     "xpathfuncs",
 ]
 
-from parsel import xpathfuncs  # NOQA
-from parsel.csstranslator import css2xpath  # NOQA
-from parsel.selector import Selector, SelectorList  # NOQA
+from parsel import xpathfuncs
+from parsel.csstranslator import css2xpath
+from parsel.selector import Selector, SelectorList
 
 xpathfuncs.setup()
diff --git a/parsel/csstranslator.py b/parsel/csstranslator.py
@@ -15,7 +15,6 @@
 
 
 class XPathExpr(OriginalXPathExpr):
-
     textnode: bool = False
     attribute: str | None = None
 
@@ -143,5 +142,5 @@ def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
 
 
 def css2xpath(query: str) -> str:
-    "Return translated XPath version of a given CSS query"
+    """Return translated XPath version of a given CSS query"""
     return _translator.css_to_xpath(query)
diff --git a/parsel/selector.py b/parsel/selector.py
@@ -6,10 +6,8 @@
 import json
 import typing
 import warnings
-from collections.abc import Mapping
 from io import BytesIO
-from re import Pattern
-from typing import Any, Literal, SupportsIndex, TypedDict, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Literal, SupportsIndex, TypedDict, TypeVar, Union
 
 import jmespath
 from lxml import etree, html
@@ -18,6 +16,11 @@
 from .csstranslator import GenericTranslator, HTMLTranslator
 from .utils import extract_regex, flatten, iflatten, shorten
 
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+    from re import Pattern
+
+
 _SelectorType = TypeVar("_SelectorType", bound="Selector")
 _ParserType = Union[etree.XMLParser, etree.HTMLParser]
 # simplified _OutputMethodArg from types-lxml
@@ -50,7 +53,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
 
 class CTGroupValue(TypedDict):
-    _parser: type[etree.XMLParser] | type[html.HTMLParser]
+    _parser: type[etree.XMLParser | html.HTMLParser]
     _csstranslator: GenericTranslator | HTMLTranslator
     _tostring_method: _TostringMethodType
 
@@ -97,7 +100,8 @@ def create_root_node(
             if "use XML_PARSE_HUGE option" in error.message:
                 warnings.warn(
                     f"Input data is too big. Upgrade to lxml "
-                    f"{lxml_huge_tree_version} or later for huge_tree support."
+                    f"{lxml_huge_tree_version} or later for huge_tree support.",
+                    stacklevel=2,
                 )
     if root is None:
         root = etree.fromstring(b"<html/>", parser=parser, base_url=base_url)
@@ -124,8 +128,7 @@ def __getitem__(
         o = super().__getitem__(pos)
         if isinstance(pos, slice):
             return self.__class__(typing.cast("SelectorList[_SelectorType]", o))
-        else:
-            return typing.cast(_SelectorType, o)
+        return typing.cast(_SelectorType, o)
 
     def __getstate__(self) -> None:
         raise TypeError("can't pickle SelectorList objects")
@@ -341,7 +344,7 @@ def _get_root_type(root: Any, *, input_type: str | None) -> str:
                 f"and {input_type!r} as type."
             )
         return _xml_or_html(input_type)
-    elif isinstance(root, (dict, list)) or _is_valid_json(root):
+    if isinstance(root, (dict, list)) or _is_valid_json(root):
         return "json"
     return input_type or "json"
 
@@ -392,14 +395,14 @@ class Selector:
     """
 
     __slots__ = [
-        "namespaces",
-        "type",
+        "__weakref__",
         "_expr",
         "_huge_tree",
-        "root",
         "_text",
         "body",
-        "__weakref__",
+        "namespaces",
+        "root",
+        "type",
     ]
 
     _default_namespaces = {
@@ -541,8 +544,7 @@ def jmespath(
         def make_selector(x: Any) -> _SelectorType:  # closure function
             if isinstance(x, str):
                 return self.__class__(text=x, _expr=query, type="text")
-            else:
-                return self.__class__(root=x, _expr=query)
+            return self.__class__(root=x, _expr=query)
 
         result = [make_selector(x) for x in result]
         return typing.cast(SelectorList[_SelectorType], self.selectorlist_cls(result))
@@ -707,10 +709,9 @@ def get(self) -> Any:
         except (AttributeError, TypeError):
             if self.root is True:
                 return "1"
-            elif self.root is False:
+            if self.root is False:
                 return "0"
-            else:
-                return str(self.root)
+            return str(self.root)
 
     extract = get
 

diff --git a/parsel/utils.py b/parsel/utils.py
@@ -1,12 +1,13 @@
 from __future__ import annotations
 
 import re
-from collections.abc import Iterable, Iterator
-from re import Match, Pattern
-from typing import Any, cast
+from typing import TYPE_CHECKING, Any, cast
 
 from w3lib.html import replace_entities as w3lib_replace_entities
 
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Iterator
+
 
 def flatten(x: Iterable[Any]) -> list[Any]:
     """flatten(sequence) -> list
@@ -61,7 +62,7 @@ def _is_listlike(x: Any) -> bool:
 
 
 def extract_regex(
-    regex: str | Pattern[str], text: str, replace_entities: bool = True
+    regex: str | re.Pattern[str], text: str, replace_entities: bool = True
 ) -> list[str]:
     """Extract a list of strings from the given text/encoding using the following policies:
     * if the regex contains a named group called "extract" that will be returned
@@ -74,7 +75,7 @@ def extract_regex(
     if "extract" in regex.groupindex:
         # named group
         try:
-            extracted = cast(Match[str], regex.search(text)).group("extract")
+            extracted = cast(re.Match[str], regex.search(text)).group("extract")
         except AttributeError:
             strings = []
         else:

diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py
@@ -54,7 +54,4 @@ def has_class(context: Any, *classes: str) -> bool:
         return False
     node_cls = " " + node_cls + " "
     node_cls = replace_html5_whitespaces(" ", node_cls)
-    for cls in classes:
-        if " " + cls + " " not in node_cls:
-            return False
-    return True
+    return all(" " + cls + " " in node_cls for cls in classes)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,126 @@
+[tool.ruff.lint]
+extend-select = [
+    # flake8-bugbear
+    "B",
+    # flake8-comprehensions
+    "C4",
+    # pydocstyle
+    "D",
+    # flake8-future-annotations
+    "FA",
+    # flynt
+    "FLY",
+    # refurb
+    "FURB",
+    # isort
+    "I",
+    # flake8-implicit-str-concat
+    "ISC",
+    # flake8-logging
+    "LOG",
+    # Perflint
+    "PERF",
+    # pygrep-hooks
+    "PGH",
+    # flake8-pie
+    "PIE",
+    # pylint
+    "PL",
+    # flake8-use-pathlib
+    "PTH",
+    # flake8-pyi
+    "PYI",
+    # flake8-quotes
+    "Q",
+    # flake8-return
+    "RET",
+    # flake8-raise
+    "RSE",
+    # Ruff-specific rules
+    "RUF",
+    # flake8-bandit
+    "S",
+    # flake8-simplify
+    "SIM",
+    # flake8-slots
+    "SLOT",
+    # flake8-debugger
+    "T10",
+    # flake8-type-checking
+    "TC",
+    # pyupgrade
+    "UP",
+    # pycodestyle warnings
+    "W",
+    # flake8-2020
+    "YTT",
+]
+ignore = [
+    # Within an `except` clause, raise exceptions with `raise ... from`
+    "B904",
+    # Missing docstring in public module
+    "D100",
+    # Missing docstring in public class
+    "D101",
+    # Missing docstring in public method
+    "D102",
+    # Missing docstring in public function
+    "D103",
+    # Missing docstring in public package
+    "D104",
+    # Missing docstring in magic method
+    "D105",
+    # Missing docstring in public nested class
+    "D106",
+    # Missing docstring in __init__
+    "D107",
+    # One-line docstring should fit on one line with quotes
+    "D200",
+    # No blank lines allowed after function docstring
+    "D202",
+    # 1 blank line required between summary line and description
+    "D205",
+    # Multi-line docstring closing quotes should be on a separate line
+    "D209",
+    # First line should end with a period
+    "D400",
+    # First line should be in imperative mood; try rephrasing
+    "D401",
+    # First line should not be the function's "signature"
+    "D402",
+    # First word of the first line should be properly capitalized
+    "D403",
+    # No blank lines allowed between a section header and its content
+    "D412",
+    # Too many return statements
+    "PLR0911",
+    # Too many branches
+    "PLR0912",
+    # Too many arguments in function definition
+    "PLR0913",
+    # Too many statements
+    "PLR0915",
+    # Magic value used in comparison
+    "PLR2004",
+    # String contains ambiguous {}.
+    "RUF001",
+    # Docstring contains ambiguous {}.
+    "RUF002",
+    # Comment contains ambiguous {}.
+    "RUF003",
+    # Mutable class attributes should be annotated with `typing.ClassVar`
+    "RUF012",
+    # Use of `assert` detected
+    "S101",
+    # Using lxml to parse untrusted data is known to be vulnerable to XML attacks
+    "S320",
+
+    # pending: https://github.com/scrapy/parsel/issues/312
+    "B019",
+]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/typing/selector.py" = ["F841"]
+
+[tool.ruff.lint.pydocstyle]
+convention = "pep257"