mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Merge branch 'patch-1' of https://github.com/shinozukayohei/calibre
This commit is contained in:
commit
c6d9bce9e8
@ -5,16 +5,20 @@ try:
|
|||||||
from http.cookiejar import Cookie
|
from http.cookiejar import Cookie
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from cookielib import Cookie
|
from cookielib import Cookie
|
||||||
import json
|
|
||||||
|
|
||||||
|
import json
|
||||||
from html5_parser import parse
|
from html5_parser import parse
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre import replace_entities
|
from calibre import replace_entities
|
||||||
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
|
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
from calibre.utils.date import parse_only_date
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
# For past editions, set date to, for example, '2020-11-28'
|
||||||
|
edition_date = None
|
||||||
|
|
||||||
|
|
||||||
def E(parent, name, text='', **attrs):
|
def E(parent, name, text='', **attrs):
|
||||||
ans = parent.makeelement(name, **attrs)
|
ans = parent.makeelement(name, **attrs)
|
||||||
@ -94,7 +98,6 @@ class Economist(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
__author__ = "Kovid Goyal"
|
__author__ = "Kovid Goyal"
|
||||||
INDEX = 'https://www.economist.com/printedition'
|
|
||||||
description = (
|
description = (
|
||||||
'Global news and current affairs from a European'
|
'Global news and current affairs from a European'
|
||||||
' perspective. Best downloaded on Friday mornings (GMT)'
|
' perspective. Best downloaded on Friday mornings (GMT)'
|
||||||
@ -224,11 +227,21 @@ class Economist(BasicNewsRecipe):
|
|||||||
article.summary = u'. '.join(result) + u'.'
|
article.summary = u'. '.join(result) + u'.'
|
||||||
article.text_summary = clean_ascii_chars(article.summary)
|
article.text_summary = clean_ascii_chars(article.summary)
|
||||||
|
|
||||||
|
def publication_date(self):
|
||||||
|
if edition_date:
|
||||||
|
return parse_only_date(edition_date, as_utc=False)
|
||||||
|
return BasicNewsRecipe.publication_date(self)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# return [('Articles', [{'title':'test',
|
# return [('Articles', [{'title':'test',
|
||||||
# 'url':'file:///t/raw.html'
|
# 'url':'file:///t/raw.html'
|
||||||
# }])]
|
# }])]
|
||||||
raw = self.index_to_soup(self.INDEX, raw=True)
|
if edition_date:
|
||||||
|
url = 'https://www.economist.com/weeklyedition/' + edition_date
|
||||||
|
self.timefmt = ' [' + edition_date + ']'
|
||||||
|
else:
|
||||||
|
url = 'https://www.economist.com/printedition'
|
||||||
|
raw = self.index_to_soup(url, raw=True)
|
||||||
# with open('/t/raw.html', 'wb') as f:
|
# with open('/t/raw.html', 'wb') as f:
|
||||||
# f.write(raw)
|
# f.write(raw)
|
||||||
soup = self.index_to_soup(raw)
|
soup = self.index_to_soup(raw)
|
||||||
@ -249,13 +262,21 @@ class Economist(BasicNewsRecipe):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def economist_parse_index(self, soup):
|
def economist_parse_index(self, soup):
|
||||||
archive = self.index_to_soup("https://www.economist.com/weeklyedition/archive")
|
img = None
|
||||||
div = archive.find(attrs={'class': 'edition-teaser__image'})
|
if edition_date:
|
||||||
if div is not None:
|
archive_url = "https://www.economist.com/weeklyedition/archive?year={}".format(edition_date[:4])
|
||||||
img = div.find('img', srcset=True)
|
archive = self.index_to_soup(archive_url)
|
||||||
|
q = edition_date.replace('-', '')
|
||||||
|
q = '/print-covers/{}_'.format(q)
|
||||||
|
img = archive.find('img', srcset=lambda x: x and q in x)
|
||||||
|
else:
|
||||||
|
archive = self.index_to_soup("https://www.economist.com/weeklyedition/archive")
|
||||||
|
div = archive.find(attrs={'class': 'edition-teaser__image'})
|
||||||
|
if div is not None:
|
||||||
|
img = div.find('img', srcset=True)
|
||||||
|
if img:
|
||||||
self.cover_url = img['srcset'].split(',')[-1].split()[0]
|
self.cover_url = img['srcset'].split(',')[-1].split()[0]
|
||||||
self.log('Got cover:', self.cover_url)
|
self.log('Got cover:', self.cover_url)
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
for section in soup.findAll(**classes('layout-weekly-edition-section')):
|
for section in soup.findAll(**classes('layout-weekly-edition-section')):
|
||||||
h2 = section.find('h2')
|
h2 = section.find('h2')
|
||||||
|
@ -5,16 +5,20 @@ try:
|
|||||||
from http.cookiejar import Cookie
|
from http.cookiejar import Cookie
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from cookielib import Cookie
|
from cookielib import Cookie
|
||||||
import json
|
|
||||||
|
|
||||||
|
import json
|
||||||
from html5_parser import parse
|
from html5_parser import parse
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre import replace_entities
|
from calibre import replace_entities
|
||||||
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
|
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
from calibre.utils.date import parse_only_date
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
# For past editions, set date to, for example, '2020-11-28'
|
||||||
|
edition_date = None
|
||||||
|
|
||||||
|
|
||||||
def E(parent, name, text='', **attrs):
|
def E(parent, name, text='', **attrs):
|
||||||
ans = parent.makeelement(name, **attrs)
|
ans = parent.makeelement(name, **attrs)
|
||||||
@ -94,7 +98,6 @@ class Economist(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
__author__ = "Kovid Goyal"
|
__author__ = "Kovid Goyal"
|
||||||
INDEX = 'https://www.economist.com/printedition'
|
|
||||||
description = (
|
description = (
|
||||||
'Global news and current affairs from a European'
|
'Global news and current affairs from a European'
|
||||||
' perspective. Best downloaded on Friday mornings (GMT)'
|
' perspective. Best downloaded on Friday mornings (GMT)'
|
||||||
@ -224,11 +227,21 @@ class Economist(BasicNewsRecipe):
|
|||||||
article.summary = u'. '.join(result) + u'.'
|
article.summary = u'. '.join(result) + u'.'
|
||||||
article.text_summary = clean_ascii_chars(article.summary)
|
article.text_summary = clean_ascii_chars(article.summary)
|
||||||
|
|
||||||
|
def publication_date(self):
|
||||||
|
if edition_date:
|
||||||
|
return parse_only_date(edition_date, as_utc=False)
|
||||||
|
return BasicNewsRecipe.publication_date(self)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# return [('Articles', [{'title':'test',
|
# return [('Articles', [{'title':'test',
|
||||||
# 'url':'file:///t/raw.html'
|
# 'url':'file:///t/raw.html'
|
||||||
# }])]
|
# }])]
|
||||||
raw = self.index_to_soup(self.INDEX, raw=True)
|
if edition_date:
|
||||||
|
url = 'https://www.economist.com/weeklyedition/' + edition_date
|
||||||
|
self.timefmt = ' [' + edition_date + ']'
|
||||||
|
else:
|
||||||
|
url = 'https://www.economist.com/printedition'
|
||||||
|
raw = self.index_to_soup(url, raw=True)
|
||||||
# with open('/t/raw.html', 'wb') as f:
|
# with open('/t/raw.html', 'wb') as f:
|
||||||
# f.write(raw)
|
# f.write(raw)
|
||||||
soup = self.index_to_soup(raw)
|
soup = self.index_to_soup(raw)
|
||||||
@ -249,13 +262,21 @@ class Economist(BasicNewsRecipe):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def economist_parse_index(self, soup):
|
def economist_parse_index(self, soup):
|
||||||
archive = self.index_to_soup("https://www.economist.com/weeklyedition/archive")
|
img = None
|
||||||
div = archive.find(attrs={'class': 'edition-teaser__image'})
|
if edition_date:
|
||||||
if div is not None:
|
archive_url = "https://www.economist.com/weeklyedition/archive?year={}".format(edition_date[:4])
|
||||||
img = div.find('img', srcset=True)
|
archive = self.index_to_soup(archive_url)
|
||||||
|
q = edition_date.replace('-', '')
|
||||||
|
q = '/print-covers/{}_'.format(q)
|
||||||
|
img = archive.find('img', srcset=lambda x: x and q in x)
|
||||||
|
else:
|
||||||
|
archive = self.index_to_soup("https://www.economist.com/weeklyedition/archive")
|
||||||
|
div = archive.find(attrs={'class': 'edition-teaser__image'})
|
||||||
|
if div is not None:
|
||||||
|
img = div.find('img', srcset=True)
|
||||||
|
if img:
|
||||||
self.cover_url = img['srcset'].split(',')[-1].split()[0]
|
self.cover_url = img['srcset'].split(',')[-1].split()[0]
|
||||||
self.log('Got cover:', self.cover_url)
|
self.log('Got cover:', self.cover_url)
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
for section in soup.findAll(**classes('layout-weekly-edition-section')):
|
for section in soup.findAll(**classes('layout-weekly-edition-section')):
|
||||||
h2 = section.find('h2')
|
h2 = section.find('h2')
|
||||||
|
@ -1449,6 +1449,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
def prepare_masthead_image(self, path_to_image, out_path):
|
def prepare_masthead_image(self, path_to_image, out_path):
|
||||||
prepare_masthead_image(path_to_image, out_path, self.MI_WIDTH, self.MI_HEIGHT)
|
prepare_masthead_image(path_to_image, out_path, self.MI_WIDTH, self.MI_HEIGHT)
|
||||||
|
|
||||||
|
def publication_date(self):
|
||||||
|
return nowf()
|
||||||
|
|
||||||
def create_opf(self, feeds, dir=None):
|
def create_opf(self, feeds, dir=None):
|
||||||
if dir is None:
|
if dir is None:
|
||||||
dir = self.output_dir
|
dir = self.output_dir
|
||||||
@ -1477,7 +1480,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
language = canonicalize_lang(self.language)
|
language = canonicalize_lang(self.language)
|
||||||
if language is not None:
|
if language is not None:
|
||||||
mi.language = language
|
mi.language = language
|
||||||
mi.pubdate = nowf()
|
mi.pubdate = self.publication_date()
|
||||||
opf_path = os.path.join(dir, 'index.opf')
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
ncx_path = os.path.join(dir, 'index.ncx')
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user