Skip to content

Commit

Permalink
use the LWN weekly publication date
Browse files Browse the repository at this point in the history
Bulk conversion (unfortunately) is a frequent thing when catching up
after a holiday or crontab breakage. It is really annoying figuring out
the LWN weekly edition reading order if they all have the same title
based on the conversion date.

Falls back to the current date if the date cannot be parsed.
  • Loading branch information
Pieter Smith committed Dec 30, 2024
1 parent bfbd83b commit 661aa5e
Showing 1 changed file with 26 additions and 1 deletion.
27 changes: 26 additions & 1 deletion recipes/lwn_weekly.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,21 @@ lwn.net

import re
import sys

from contextlib import contextmanager
import locale
from datetime import datetime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.date import now as nowf


@contextmanager
def temporary_locale(temp_locale):
old_locale = locale.getlocale()
try:
locale.setlocale(locale.LC_ALL, temp_locale)
yield
finally:
locale.setlocale(locale.LC_ALL, old_locale)[2]


class WeeklyLWN(BasicNewsRecipe):
Expand Down Expand Up @@ -82,6 +95,9 @@ class WeeklyLWN(BasicNewsRecipe):

return url

def publication_date(self):
return self.pub_date

def parse_index(self):
past_edition = self.recipe_specific_options.get('issue')
if past_edition and isinstance(past_edition, str):
Expand All @@ -91,6 +107,15 @@ class WeeklyLWN(BasicNewsRecipe):
else:
index_url = self.print_version('/free/bigpage')
soup = self.index_to_soup(index_url)

try:
with temporary_locale('en_US'):
date_match = re.match(r'.* +for +(.*) +\[.*', self.tag_to_string(soup.head.title.string))
self.pub_date = datetime.strptime(date_match[1], '%B %d, %Y')
except (TypeError, ValueError):
self.log.error('Failed to parse publication date from title: %r, using current time' % soup.head.title.string)
self.pub_date = nowf()

curr = soup.body

articles = {}
Expand Down

0 comments on commit 661aa5e

Please sign in to comment.