From 661aa5e1238bc4506a01f89898a3157157b2f6ae Mon Sep 17 00:00:00 2001 From: Pieter Smith Date: Sun, 29 Dec 2024 13:27:39 +0100 Subject: [PATCH] use the LWN weekly publication date Bulk conversion (unfortunately) is a frequent thing when catching up after a holiday or crontab breakage. It is really annoying figuring out the LWN weekly edition reading order if they all have the same title based on the conversion date. Falls back to the current date if the date cannot be parsed. --- recipes/lwn_weekly.recipe | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/recipes/lwn_weekly.recipe b/recipes/lwn_weekly.recipe index 1f73d07d24c7..fbdd2b43b7fc 100644 --- a/recipes/lwn_weekly.recipe +++ b/recipes/lwn_weekly.recipe @@ -10,8 +10,21 @@ lwn.net import re import sys - +from contextlib import contextmanager +import locale +from datetime import datetime from calibre.web.feeds.news import BasicNewsRecipe +from calibre.utils.date import now as nowf + + +@contextmanager +def temporary_locale(temp_locale): + old_locale = locale.getlocale() + try: + locale.setlocale(locale.LC_ALL, temp_locale) + yield + finally: + locale.setlocale(locale.LC_ALL, old_locale)[2] class WeeklyLWN(BasicNewsRecipe): @@ -82,6 +95,9 @@ class WeeklyLWN(BasicNewsRecipe): return url + def publication_date(self): + return self.pub_date + def parse_index(self): past_edition = self.recipe_specific_options.get('issue') if past_edition and isinstance(past_edition, str): @@ -91,6 +107,15 @@ class WeeklyLWN(BasicNewsRecipe): else: index_url = self.print_version('/free/bigpage') soup = self.index_to_soup(index_url) + + try: + with temporary_locale('en_US'): + date_match = re.match(r'.* +for +(.*) +\[.*', self.tag_to_string(soup.head.title.string)) + self.pub_date = datetime.strptime(date_match[1], '%B %d, %Y') + except (TypeError, ValueError): + self.log.error('Failed to parse publication date from title: %r, using current time' % soup.head.title.string) + self.pub_date = nowf() + curr = soup.body articles = {}