Skip to content

Commit

Permalink
Migrate to ruff.
Browse files Browse the repository at this point in the history
  • Loading branch information
wRAR committed Jan 30, 2025
1 parent c61c97c commit 5b2677b
Show file tree
Hide file tree
Showing 17 changed files with 196 additions and 112 deletions.
6 changes: 0 additions & 6 deletions .bandit.yml

This file was deleted.

15 changes: 0 additions & 15 deletions .flake8

This file was deleted.

26 changes: 5 additions & 21 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,23 +1,7 @@
repos:
- repo: https://github.com/PyCQA/bandit
rev: 1.7.10
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.3
hooks:
- id: bandit
args: [-r, -c, .bandit.yml]
- repo: https://github.com/PyCQA/flake8
rev: 7.1.1
hooks:
- id: flake8
- repo: https://github.com/psf/black.git
rev: 24.10.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v3.19.1
hooks:
- id: pyupgrade
args: [--py39-plus]
- id: ruff
args: [ --fix ]
- id: ruff-format
5 changes: 2 additions & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#!/usr/bin/env python

import os
import sys
from pathlib import Path

# Get the project root dir, which is the parent dir of this
cwd = os.getcwd()
project_root = os.path.dirname(cwd)
project_root = str(Path.cwd().parent)

# Insert the project root dir as the first element in the PYTHONPATH.
# This lets us ensure that the source package is imported, and that its
Expand Down
7 changes: 3 additions & 4 deletions docs/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
from pathlib import Path

from sybil import Sybil

Expand All @@ -16,9 +16,8 @@


def load_selector(filename, **kwargs):
input_path = os.path.join(os.path.dirname(__file__), "_static", filename)
with open(input_path, encoding="utf-8") as input_file:
return Selector(text=input_file.read(), **kwargs)
input_path = Path(__file__).parent / "_static" / filename
return Selector(text=input_path.read_text(encoding="utf-8"), **kwargs)


def setup(namespace):
Expand Down
6 changes: 3 additions & 3 deletions parsel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
"xpathfuncs",
]

from parsel import xpathfuncs # NOQA
from parsel.csstranslator import css2xpath # NOQA
from parsel.selector import Selector, SelectorList # NOQA
from parsel import xpathfuncs
from parsel.csstranslator import css2xpath
from parsel.selector import Selector, SelectorList

xpathfuncs.setup()
3 changes: 1 addition & 2 deletions parsel/csstranslator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@


class XPathExpr(OriginalXPathExpr):

textnode: bool = False
attribute: str | None = None

Expand Down Expand Up @@ -143,5 +142,5 @@ def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:


def css2xpath(query: str) -> str:
"Return translated XPath version of a given CSS query"
"""Return translated XPath version of a given CSS query"""
return _translator.css_to_xpath(query)
35 changes: 18 additions & 17 deletions parsel/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
import json
import typing
import warnings
from collections.abc import Mapping
from io import BytesIO
from re import Pattern
from typing import Any, Literal, SupportsIndex, TypedDict, TypeVar, Union
from typing import TYPE_CHECKING, Any, Literal, SupportsIndex, TypedDict, TypeVar, Union

import jmespath
from lxml import etree, html
Expand All @@ -18,6 +16,11 @@
from .csstranslator import GenericTranslator, HTMLTranslator
from .utils import extract_regex, flatten, iflatten, shorten

if TYPE_CHECKING:
from collections.abc import Mapping
from re import Pattern


_SelectorType = TypeVar("_SelectorType", bound="Selector")
_ParserType = Union[etree.XMLParser, etree.HTMLParser]
# simplified _OutputMethodArg from types-lxml
Expand Down Expand Up @@ -50,7 +53,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:


class CTGroupValue(TypedDict):
_parser: type[etree.XMLParser] | type[html.HTMLParser]
_parser: type[etree.XMLParser | html.HTMLParser]
_csstranslator: GenericTranslator | HTMLTranslator
_tostring_method: _TostringMethodType

Expand Down Expand Up @@ -97,7 +100,8 @@ def create_root_node(
if "use XML_PARSE_HUGE option" in error.message:
warnings.warn(
f"Input data is too big. Upgrade to lxml "
f"{lxml_huge_tree_version} or later for huge_tree support."
f"{lxml_huge_tree_version} or later for huge_tree support.",
stacklevel=2,
)
if root is None:
root = etree.fromstring(b"<html/>", parser=parser, base_url=base_url)
Expand All @@ -124,8 +128,7 @@ def __getitem__(
o = super().__getitem__(pos)
if isinstance(pos, slice):
return self.__class__(typing.cast("SelectorList[_SelectorType]", o))
else:
return typing.cast(_SelectorType, o)
return typing.cast(_SelectorType, o)

def __getstate__(self) -> None:
raise TypeError("can't pickle SelectorList objects")
Expand Down Expand Up @@ -341,7 +344,7 @@ def _get_root_type(root: Any, *, input_type: str | None) -> str:
f"and {input_type!r} as type."
)
return _xml_or_html(input_type)
elif isinstance(root, (dict, list)) or _is_valid_json(root):
if isinstance(root, (dict, list)) or _is_valid_json(root):
return "json"
return input_type or "json"

Expand Down Expand Up @@ -392,14 +395,14 @@ class Selector:
"""

__slots__ = [
"namespaces",
"type",
"__weakref__",
"_expr",
"_huge_tree",
"root",
"_text",
"body",
"__weakref__",
"namespaces",
"root",
"type",
]

_default_namespaces = {
Expand Down Expand Up @@ -541,8 +544,7 @@ def jmespath(
def make_selector(x: Any) -> _SelectorType: # closure function
if isinstance(x, str):
return self.__class__(text=x, _expr=query, type="text")
else:
return self.__class__(root=x, _expr=query)
return self.__class__(root=x, _expr=query)

result = [make_selector(x) for x in result]
return typing.cast(SelectorList[_SelectorType], self.selectorlist_cls(result))
Expand Down Expand Up @@ -707,10 +709,9 @@ def get(self) -> Any:
except (AttributeError, TypeError):
if self.root is True:
return "1"
elif self.root is False:
if self.root is False:
return "0"
else:
return str(self.root)
return str(self.root)

extract = get

Expand Down
11 changes: 6 additions & 5 deletions parsel/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations

import re
from collections.abc import Iterable, Iterator
from re import Match, Pattern
from typing import Any, cast
from typing import TYPE_CHECKING, Any, cast

from w3lib.html import replace_entities as w3lib_replace_entities

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator


def flatten(x: Iterable[Any]) -> list[Any]:
"""flatten(sequence) -> list
Expand Down Expand Up @@ -61,7 +62,7 @@ def _is_listlike(x: Any) -> bool:


def extract_regex(
regex: str | Pattern[str], text: str, replace_entities: bool = True
regex: str | re.Pattern[str], text: str, replace_entities: bool = True
) -> list[str]:
"""Extract a list of strings from the given text/encoding using the following policies:
* if the regex contains a named group called "extract" that will be returned
Expand All @@ -74,7 +75,7 @@ def extract_regex(
if "extract" in regex.groupindex:
# named group
try:
extracted = cast(Match[str], regex.search(text)).group("extract")
extracted = cast(re.Match[str], regex.search(text)).group("extract")
except AttributeError:
strings = []
else:
Expand Down
5 changes: 1 addition & 4 deletions parsel/xpathfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,4 @@ def has_class(context: Any, *classes: str) -> bool:
return False
node_cls = " " + node_cls + " "
node_cls = replace_html5_whitespaces(" ", node_cls)
for cls in classes:
if " " + cls + " " not in node_cls:
return False
return True
return all(" " + cls + " " in node_cls for cls in classes)
126 changes: 126 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
[tool.ruff.lint]
extend-select = [
# flake8-bugbear
"B",
# flake8-comprehensions
"C4",
# pydocstyle
"D",
# flake8-future-annotations
"FA",
# flynt
"FLY",
# refurb
"FURB",
# isort
"I",
# flake8-implicit-str-concat
"ISC",
# flake8-logging
"LOG",
# Perflint
"PERF",
# pygrep-hooks
"PGH",
# flake8-pie
"PIE",
# pylint
"PL",
# flake8-use-pathlib
"PTH",
# flake8-pyi
"PYI",
# flake8-quotes
"Q",
# flake8-return
"RET",
# flake8-raise
"RSE",
# Ruff-specific rules
"RUF",
# flake8-bandit
"S",
# flake8-simplify
"SIM",
# flake8-slots
"SLOT",
# flake8-debugger
"T10",
# flake8-type-checking
"TC",
# pyupgrade
"UP",
# pycodestyle warnings
"W",
# flake8-2020
"YTT",
]
ignore = [
# Within an `except` clause, raise exceptions with `raise ... from`
"B904",
# Missing docstring in public module
"D100",
# Missing docstring in public class
"D101",
# Missing docstring in public method
"D102",
# Missing docstring in public function
"D103",
# Missing docstring in public package
"D104",
# Missing docstring in magic method
"D105",
# Missing docstring in public nested class
"D106",
# Missing docstring in __init__
"D107",
# One-line docstring should fit on one line with quotes
"D200",
# No blank lines allowed after function docstring
"D202",
# 1 blank line required between summary line and description
"D205",
# Multi-line docstring closing quotes should be on a separate line
"D209",
# First line should end with a period
"D400",
# First line should be in imperative mood; try rephrasing
"D401",
# First line should not be the function's "signature"
"D402",
# First word of the first line should be properly capitalized
"D403",
# No blank lines allowed between a section header and its content
"D412",
# Too many return statements
"PLR0911",
# Too many branches
"PLR0912",
# Too many arguments in function definition
"PLR0913",
# Too many statements
"PLR0915",
# Magic value used in comparison
"PLR2004",
# String contains ambiguous {}.
"RUF001",
# Docstring contains ambiguous {}.
"RUF002",
# Comment contains ambiguous {}.
"RUF003",
# Mutable class attributes should be annotated with `typing.ClassVar`
"RUF012",
# Use of `assert` detected
"S101",
# Using lxml to parse untrusted data is known to be vulnerable to XML attacks
"S320",

# pending: https://github.com/scrapy/parsel/issues/312
"B019",
]

[tool.ruff.lint.per-file-ignores]
"tests/typing/selector.py" = ["F841"]

[tool.ruff.lint.pydocstyle]
convention = "pep257"
Loading

0 comments on commit 5b2677b

Please sign in to comment.