Merge branch 'patch-1' of https://github.com/shinozukayohei/calibre

2025-08-30 23:00:21 -04:00 · 2020-12-22 14:08:50 +05:30 · 2020-12-22 14:08:50 +05:30 · c6d9bce9e8
commit c6d9bce9e8
parent 4daf63fca1 7567ced528
3 changed files with 62 additions and 17 deletions
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -5,16 +5,20 @@ try:
    from http.cookiejar import Cookie
 except ImportError:
    from cookielib import Cookie
-import json

+import json
 from html5_parser import parse
 from lxml import etree

 from calibre import replace_entities
 from calibre.ebooks.BeautifulSoup import NavigableString, Tag
 from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.date import parse_only_date
 from calibre.web.feeds.news import BasicNewsRecipe

+# For past editions, set date to, for example, '2020-11-28'
+edition_date = None
+

 def E(parent, name, text='', **attrs):
    ans = parent.makeelement(name, **attrs)
@ -94,7 +98,6 @@ class Economist(BasicNewsRecipe):
    language = 'en'

    __author__ = "Kovid Goyal"
-    INDEX = 'https://www.economist.com/printedition'
    description = (
        'Global news and current affairs from a European'
        ' perspective. Best downloaded on Friday mornings (GMT)'
@ -224,11 +227,21 @@ class Economist(BasicNewsRecipe):
        article.summary = u'. '.join(result) + u'.'
        article.text_summary = clean_ascii_chars(article.summary)

+    def publication_date(self):
+        if edition_date:
+            return parse_only_date(edition_date, as_utc=False)
+        return BasicNewsRecipe.publication_date(self)
+
    def parse_index(self):
        # return [('Articles', [{'title':'test',
        #     'url':'file:///t/raw.html'
        # }])]
-        raw = self.index_to_soup(self.INDEX, raw=True)
+        if edition_date:
+            url = 'https://www.economist.com/weeklyedition/' + edition_date
+            self.timefmt = ' [' + edition_date + ']'
+        else:
+            url = 'https://www.economist.com/printedition'
+        raw = self.index_to_soup(url, raw=True)
        # with open('/t/raw.html', 'wb') as f:
        #     f.write(raw)
        soup = self.index_to_soup(raw)
@ -249,13 +262,21 @@ class Economist(BasicNewsRecipe):
        return ans

    def economist_parse_index(self, soup):
-        archive = self.index_to_soup("https://www.economist.com/weeklyedition/archive")
-        div = archive.find(attrs={'class': 'edition-teaser__image'})
-        if div is not None:
-            img = div.find('img', srcset=True)
+        img = None
+        if edition_date:
+            archive_url = "https://www.economist.com/weeklyedition/archive?year={}".format(edition_date[:4])
+            archive = self.index_to_soup(archive_url)
+            q = edition_date.replace('-', '')
+            q = '/print-covers/{}_'.format(q)
+            img = archive.find('img', srcset=lambda x: x and q in x)
+        else:
+            archive = self.index_to_soup("https://www.economist.com/weeklyedition/archive")
+            div = archive.find(attrs={'class': 'edition-teaser__image'})
+            if div is not None:
+                img = div.find('img', srcset=True)
+        if img:
            self.cover_url = img['srcset'].split(',')[-1].split()[0]
            self.log('Got cover:', self.cover_url)
-
        feeds = []
        for section in soup.findAll(**classes('layout-weekly-edition-section')):
            h2 = section.find('h2')
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -5,16 +5,20 @@ try:
    from http.cookiejar import Cookie
 except ImportError:
    from cookielib import Cookie
-import json

+import json
 from html5_parser import parse
 from lxml import etree

 from calibre import replace_entities
 from calibre.ebooks.BeautifulSoup import NavigableString, Tag
 from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.date import parse_only_date
 from calibre.web.feeds.news import BasicNewsRecipe

+# For past editions, set date to, for example, '2020-11-28'
+edition_date = None
+

 def E(parent, name, text='', **attrs):
    ans = parent.makeelement(name, **attrs)
@ -94,7 +98,6 @@ class Economist(BasicNewsRecipe):
    language = 'en'

    __author__ = "Kovid Goyal"
-    INDEX = 'https://www.economist.com/printedition'
    description = (
        'Global news and current affairs from a European'
        ' perspective. Best downloaded on Friday mornings (GMT)'
@ -224,11 +227,21 @@ class Economist(BasicNewsRecipe):
        article.summary = u'. '.join(result) + u'.'
        article.text_summary = clean_ascii_chars(article.summary)

+    def publication_date(self):
+        if edition_date:
+            return parse_only_date(edition_date, as_utc=False)
+        return BasicNewsRecipe.publication_date(self)
+
    def parse_index(self):
        # return [('Articles', [{'title':'test',
        #     'url':'file:///t/raw.html'
        # }])]
-        raw = self.index_to_soup(self.INDEX, raw=True)
+        if edition_date:
+            url = 'https://www.economist.com/weeklyedition/' + edition_date
+            self.timefmt = ' [' + edition_date + ']'
+        else:
+            url = 'https://www.economist.com/printedition'
+        raw = self.index_to_soup(url, raw=True)
        # with open('/t/raw.html', 'wb') as f:
        #     f.write(raw)
        soup = self.index_to_soup(raw)
@ -249,13 +262,21 @@ class Economist(BasicNewsRecipe):
        return ans

    def economist_parse_index(self, soup):
-        archive = self.index_to_soup("https://www.economist.com/weeklyedition/archive")
-        div = archive.find(attrs={'class': 'edition-teaser__image'})
-        if div is not None:
-            img = div.find('img', srcset=True)
+        img = None
+        if edition_date:
+            archive_url = "https://www.economist.com/weeklyedition/archive?year={}".format(edition_date[:4])
+            archive = self.index_to_soup(archive_url)
+            q = edition_date.replace('-', '')
+            q = '/print-covers/{}_'.format(q)
+            img = archive.find('img', srcset=lambda x: x and q in x)
+        else:
+            archive = self.index_to_soup("https://www.economist.com/weeklyedition/archive")
+            div = archive.find(attrs={'class': 'edition-teaser__image'})
+            if div is not None:
+                img = div.find('img', srcset=True)
+        if img:
            self.cover_url = img['srcset'].split(',')[-1].split()[0]
            self.log('Got cover:', self.cover_url)
-
        feeds = []
        for section in soup.findAll(**classes('layout-weekly-edition-section')):
            h2 = section.find('h2')
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -1449,6 +1449,9 @@ class BasicNewsRecipe(Recipe):
    def prepare_masthead_image(self, path_to_image, out_path):
        prepare_masthead_image(path_to_image, out_path, self.MI_WIDTH, self.MI_HEIGHT)

+    def publication_date(self):
+        return nowf()
+
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
@ -1477,7 +1480,7 @@ class BasicNewsRecipe(Recipe):
        language = canonicalize_lang(self.language)
        if language is not None:
            mi.language = language
-        mi.pubdate = nowf()
+        mi.pubdate = self.publication_date()
        opf_path = os.path.join(dir, 'index.opf')
        ncx_path = os.path.join(dir, 'index.ncx')