Skip to content

Commit

Permalink
use the LWN weekly publication date
Browse files Browse the repository at this point in the history
Bulk conversion (unfortunately) is a frequent thing when catching up
after a holiday or crontab breakage. It is really annoying figuring out
the LWN weekly edition reading order if they all have the same title
based on the conversion date.

Falls back to the current date if the date cannot be parsed.

Uses dateutil.parser.parse() for thread-safety.
  • Loading branch information
Pieter Smith committed Dec 30, 2024
1 parent bfbd83b commit c36ba84
Showing 1 changed file with 15 additions and 1 deletion.
16 changes: 15 additions & 1 deletion recipes/lwn_weekly.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ lwn.net

import re
import sys

from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.date import now as nowf


class WeeklyLWN(BasicNewsRecipe):
Expand Down Expand Up @@ -82,6 +82,19 @@ class WeeklyLWN(BasicNewsRecipe):

return url

def publication_date(self):
return self.pub_date

def parse_publication_date(self, soup):
from dateutil.parser import parse, ParserError
try:
date_match = re.match(r'.* +for +([^\[]*)', self.tag_to_string(soup.head.title.string))
# dateutil.parser.parse() is considered thread-safe
self.pub_date = parse(date_match[1])
except (TypeError, ParserError):
self.log.warning('Failed to parse publication date from title: %r, using current time' % soup.head.title.string)
self.pub_date = nowf()

def parse_index(self):
past_edition = self.recipe_specific_options.get('issue')
if past_edition and isinstance(past_edition, str):
Expand All @@ -91,6 +104,7 @@ class WeeklyLWN(BasicNewsRecipe):
else:
index_url = self.print_version('/free/bigpage')
soup = self.index_to_soup(index_url)
self.parse_publication_date(soup)
curr = soup.body

articles = {}
Expand Down

0 comments on commit c36ba84

Please sign in to comment.