mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
931ee9867a
commit
cd126ea658
@ -22,7 +22,6 @@ def E(parent, name, text='', **attrs):
|
||||
parent.append(ans)
|
||||
return ans
|
||||
|
||||
|
||||
def process_node(node, html_parent):
|
||||
ntype = node.get('type')
|
||||
|
||||
@ -48,11 +47,9 @@ def ts_date(x):
|
||||
dt = datetime.fromtimestamp(x/1000 + time.timezone)
|
||||
return dt.strftime('%b %d, %Y at %I:%M %p')
|
||||
|
||||
|
||||
def auth(x):
|
||||
return ', '.join([a['name'] for a in x])
|
||||
|
||||
|
||||
def load_article_from_json(raw, root):
|
||||
# open('/t/raw.json', 'w').write(raw)
|
||||
data = json.loads(raw)['props']['pageProps']['payload']['data']['article']
|
||||
@ -103,6 +100,11 @@ class SCMP(BasicNewsRecipe):
|
||||
publication_type = "newspaper"
|
||||
ignore_duplicate_articles = {"title", "url"}
|
||||
extra_css = 'blockquote, em { color: #202020; }'
|
||||
masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/c/c3/SCMP_logo.svg'
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.frontpages.com/south-china-morning-post/')
|
||||
return 'https://www.frontpages.com' + soup.find('img', attrs={'id':'giornale-img'})['src']
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
@ -118,10 +120,6 @@ class SCMP(BasicNewsRecipe):
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.frontpages.com/south-china-morning-post/')
|
||||
return 'https://www.frontpages.com' + soup.find('img', attrs={'id':'giornale-img'})['src']
|
||||
|
||||
# used when unable to extract article from <script>, particularly in the Sports section
|
||||
remove_tags = [
|
||||
dict(
|
||||
@ -154,8 +152,6 @@ class SCMP(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
return url.split('?')[0]
|
||||
|
||||
masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/c/c3/SCMP_logo.svg'
|
||||
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
body = '<html><body><article></article></body></html>'
|
||||
b_root = parse(body)
|
||||
|
Loading…
x
Reference in New Issue
Block a user