mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add date to articles in nyt todays papaer index
This commit is contained in:
parent
a385f0a2d9
commit
4e730dc862
@ -4,8 +4,11 @@
|
|||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.utils.date import strptime
|
from calibre.utils.date import strptime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
is_web_edition = True
|
is_web_edition = True
|
||||||
# The sections to download when downloading the web edition, comment out
|
# The sections to download when downloading the web edition, comment out
|
||||||
@ -36,6 +39,20 @@ web_sections = [
|
|||||||
('Obituaries', 'obituaries'),
|
('Obituaries', 'obituaries'),
|
||||||
('Sunday Magazine', 'magazine')
|
('Sunday Magazine', 'magazine')
|
||||||
]
|
]
|
||||||
|
url_date_pat = re.compile(r'/(2\d\d\d)/(\d\d)/(\d\d)/')
|
||||||
|
|
||||||
|
|
||||||
|
def date_from_url(url):
|
||||||
|
m = url_date_pat.search(url)
|
||||||
|
if m is not None:
|
||||||
|
return datetime.date(*map(int, m.groups()))
|
||||||
|
|
||||||
|
|
||||||
|
def format_date(d):
|
||||||
|
try:
|
||||||
|
return d.strftime(' [%a, %d %b %Y]').decode('utf-8')
|
||||||
|
except Exception:
|
||||||
|
return d.strftime(' [%Y/%m/%d]').decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
@ -71,7 +88,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
||||||
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
||||||
dict(id=lambda x: x and 'sharetools-' in x),
|
dict(id=lambda x: x and 'sharetools-' in x),
|
||||||
dict(id='newsletter-promo'.split()),
|
dict(id='newsletter-promo supported-by-ad'.split()),
|
||||||
classes('story-print-citation'),
|
classes('story-print-citation'),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -98,9 +115,14 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
s = p.find(**classes('summary'))
|
s = p.find(**classes('summary'))
|
||||||
if s is not None:
|
if s is not None:
|
||||||
desc = self.tag_to_string(s)
|
desc = self.tag_to_string(s)
|
||||||
self.log('\t', title, ': ', url)
|
date = ''
|
||||||
|
d = date_from_url(url)
|
||||||
|
if d is not None:
|
||||||
|
date = format_date(d)
|
||||||
|
|
||||||
|
self.log('\t', title + date, ': ', url)
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
yield {'title': title, 'url': url, 'description': desc}
|
yield {'title': title, 'url': url, 'description': desc, 'date': date}
|
||||||
|
|
||||||
def parse_todays_page(self):
|
def parse_todays_page(self):
|
||||||
soup = self.read_nyt_metadata()
|
soup = self.read_nyt_metadata()
|
||||||
|
@ -4,8 +4,11 @@
|
|||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.utils.date import strptime
|
from calibre.utils.date import strptime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
is_web_edition = False
|
is_web_edition = False
|
||||||
# The sections to download when downloading the web edition, comment out
|
# The sections to download when downloading the web edition, comment out
|
||||||
@ -36,6 +39,20 @@ web_sections = [
|
|||||||
('Obituaries', 'obituaries'),
|
('Obituaries', 'obituaries'),
|
||||||
('Sunday Magazine', 'magazine')
|
('Sunday Magazine', 'magazine')
|
||||||
]
|
]
|
||||||
|
url_date_pat = re.compile(r'/(2\d\d\d)/(\d\d)/(\d\d)/')
|
||||||
|
|
||||||
|
|
||||||
|
def date_from_url(url):
|
||||||
|
m = url_date_pat.search(url)
|
||||||
|
if m is not None:
|
||||||
|
return datetime.date(*map(int, m.groups()))
|
||||||
|
|
||||||
|
|
||||||
|
def format_date(d):
|
||||||
|
try:
|
||||||
|
return d.strftime(' [%a, %d %b %Y]').decode('utf-8')
|
||||||
|
except Exception:
|
||||||
|
return d.strftime(' [%Y/%m/%d]').decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
@ -71,7 +88,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
||||||
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
||||||
dict(id=lambda x: x and 'sharetools-' in x),
|
dict(id=lambda x: x and 'sharetools-' in x),
|
||||||
dict(id='newsletter-promo'.split()),
|
dict(id='newsletter-promo supported-by-ad'.split()),
|
||||||
classes('story-print-citation'),
|
classes('story-print-citation'),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -98,9 +115,14 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
s = p.find(**classes('summary'))
|
s = p.find(**classes('summary'))
|
||||||
if s is not None:
|
if s is not None:
|
||||||
desc = self.tag_to_string(s)
|
desc = self.tag_to_string(s)
|
||||||
self.log('\t', title, ': ', url)
|
date = ''
|
||||||
|
d = date_from_url(url)
|
||||||
|
if d is not None:
|
||||||
|
date = format_date(d)
|
||||||
|
|
||||||
|
self.log('\t', title + date, ': ', url)
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
yield {'title': title, 'url': url, 'description': desc}
|
yield {'title': title, 'url': url, 'description': desc, 'date': date}
|
||||||
|
|
||||||
def parse_todays_page(self):
|
def parse_todays_page(self):
|
||||||
soup = self.read_nyt_metadata()
|
soup = self.read_nyt_metadata()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user