Skip to content

Commit

Permalink
Dropping RequestRetrying altogether
Browse files Browse the repository at this point in the history
Fix #987
  • Loading branch information
Yomguithereal committed Oct 2, 2024
1 parent cc0ac92 commit 7c5add3
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 24 deletions.
15 changes: 8 additions & 7 deletions minet/crawl/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ def __init__(
"cancel_event": crawler.executor.cancel_event,
}

if self.crawler.executor.retry_on_statuses is not None:
self.default_kwargs["raise_on_statuses"] = (
self.crawler.executor.retry_on_statuses
)

if use_pycurl:
del self.default_kwargs["pool_manager"]
self.default_kwargs["use_pycurl"] = True
Expand Down Expand Up @@ -200,16 +205,12 @@ def __call__(
# and the subsequent spider processing
response = None

# NOTE: the function takes "url" and "raise_on_statuses" because of RequestRetrying quirks
def retryable_work(
url: str, raise_on_statuses=None
) -> Optional[Tuple["Response", Any, Any]]:
# NOTE: the function takes "url" so that the executor may format the warning's epilog
def retryable_work(url: str) -> Optional[Tuple["Response", Any, Any]]:
nonlocal response

try:
response = request_fn(
url, raise_on_statuses=raise_on_statuses, **kwargs
)
response = request_fn(url, **kwargs)

except CancelledRequestError:
return
Expand Down
8 changes: 8 additions & 0 deletions minet/executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
Union,
Tuple,
Awaitable,
Container,
Any,
TYPE_CHECKING,
Literal,
Expand Down Expand Up @@ -365,6 +366,7 @@ def __init__(
infer_redirection: bool = False,
canonicalize: bool = False,
known_encoding: Optional[str] = None,
raise_on_statuses: Optional[Container[int]] = None,
callback: Optional[
Union[
Callable[[ItemType, str, Response], CallbackResultType],
Expand Down Expand Up @@ -398,6 +400,7 @@ def __init__(
"follow_js_relocation": follow_js_relocation,
"infer_redirection": infer_redirection,
"canonicalize": canonicalize,
"raise_on_statuses": raise_on_statuses,
}

if use_pycurl:
Expand Down Expand Up @@ -490,6 +493,7 @@ def __init__(
):
self.cancel_event = Event()
self.local_context = threading.local()
self.retry_on_statuses = None

if retry:

Expand All @@ -505,6 +509,8 @@ def epilog(retry_state: RetryCallState) -> str:

default_retryer_kwargs.update(retryer_kwargs or {})

self.retry_on_statuses = default_retryer_kwargs.get("retry_on_statuses")

def init_local_context():
self.local_context.retryer = create_request_retryer(
**default_retryer_kwargs
Expand Down Expand Up @@ -615,6 +621,7 @@ def request(
use_pycurl=use_pycurl,
compressed=compressed,
known_encoding=known_encoding,
raise_on_statuses=self.retry_on_statuses,
callback=callback,
)

Expand Down Expand Up @@ -723,6 +730,7 @@ def resolve(
follow_js_relocation=follow_js_relocation,
infer_redirection=infer_redirection,
canonicalize=canonicalize,
raise_on_statuses=self.retry_on_statuses,
callback=callback,
)

Expand Down
20 changes: 3 additions & 17 deletions minet/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -1228,20 +1228,6 @@ def __call__(self, retry_state: RetryCallState) -> float:
return max(0, min(result, self.max))


class RequestRetrying(Retrying):
def __init__(
self, *args, invalid_statuses: Optional[Container[int]] = None, **kwargs
):
self._invalid_statuses = invalid_statuses
super().__init__(*args, **kwargs)

def __call__(self, fn, *args, **kwargs):
if self._invalid_statuses is not None:
kwargs["raise_on_statuses"] = self._invalid_statuses

return super().__call__(fn, *args, **kwargs)


def create_request_retryer(
min: float = 10,
max: float = ONE_DAY,
Expand All @@ -1253,7 +1239,7 @@ def create_request_retryer(
predicate: Optional[Callable[[BaseException], bool]] = None,
epilog: Optional[Callable[[RetryCallState], Optional[str]]] = None,
cancel_event: Optional[Event] = None,
) -> RequestRetrying:
) -> Retrying:
# By default we only retry network issues, such as Internet being cut off etc.
retryable_exception_types = [
# urllib3 errors
Expand Down Expand Up @@ -1339,7 +1325,7 @@ def status_predicate(exc: BaseException) -> bool:
lambda _: not cancel_event.is_set()
)

return RequestRetrying(invalid_statuses=retry_on_statuses, **retrying_kwargs)
return Retrying(**retrying_kwargs)


def retrying_method(attr="retryer"):
Expand All @@ -1363,7 +1349,7 @@ def __init__(self, **kwargs):
self.kwargs = kwargs
self.local_context = threading.local()

def acquire(self) -> RequestRetrying:
def acquire(self) -> Retrying:
retryer = getattr(self.local_context, "retryer", None)

if retryer is None:
Expand Down

0 comments on commit 7c5add3

Please sign in to comment.