mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
32c2ac7fac
@ -2,7 +2,6 @@
|
|||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from datetime import date
|
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
from html5_parser import parse
|
from html5_parser import parse
|
||||||
@ -32,14 +31,14 @@ class ft(BasicNewsRecipe):
|
|||||||
.o-topper__visual, #fig, .main-image, .n-content-image { text-align:center; font-size:small; }
|
.o-topper__visual, #fig, .main-image, .n-content-image { text-align:center; font-size:small; }
|
||||||
blockquote, i { color:#5c5c5c; }
|
blockquote, i { color:#5c5c5c; }
|
||||||
.o-topper__standfirst { font-style:italic; color:#202020; }
|
.o-topper__standfirst { font-style:italic; color:#202020; }
|
||||||
.o-topper__topic { font-size:small; color:#5c5c5c; }
|
.o-topper__topic, .article-info__time-byline-content { font-size:small; color:#5c5c5c; }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
recipe_specific_options = {
|
recipe_specific_options = {
|
||||||
'days': {
|
'days': {
|
||||||
'short': 'Oldest article to download from this news source. In days ',
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
'default': str(oldest_article)
|
'default': str(oldest_article),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,16 +50,19 @@ class ft(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes(
|
classes(
|
||||||
'body_json o-topper__topic o-topper__headline o-topper__standfirst o-topper__visual article-info__time-byline main-image'
|
'body_json o-topper__topic o-topper__headline o-topper__standfirst '
|
||||||
|
'article-info__time-byline-content o-topper__visual main-image'
|
||||||
),
|
),
|
||||||
dict(name='article', attrs={'id':'article-body'})
|
dict(name='article', attrs={'id': 'article-body'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['source', 'svg', 'button', 'aside']),
|
dict(name=['source', 'svg', 'button', 'aside']),
|
||||||
dict(name='aside', attrs={'class': 'n-content-recommended--single-story'}),
|
dict(name='aside', attrs={'class': 'n-content-recommended--single-story'}),
|
||||||
dict(attrs={'data-layout-name': 'card'}),
|
dict(attrs={'data-layout-name': 'card'}),
|
||||||
classes('in-article-advert flourish-disclaimer')
|
classes(
|
||||||
|
'in-article-advert flourish-disclaimer n-myft-ui__preferences-modal n-myft-ui n-myft-ui--follow'
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
@ -108,7 +110,7 @@ class ft(BasicNewsRecipe):
|
|||||||
('Climate', 'https://www.ft.com/climate-capital?format=rss'),
|
('Climate', 'https://www.ft.com/climate-capital?format=rss'),
|
||||||
('Life & Arts', 'https://www.ft.com/life-arts?format=rss'),
|
('Life & Arts', 'https://www.ft.com/life-arts?format=rss'),
|
||||||
('How to spend it', 'https://www.ft.com/htsi?format=rss'),
|
('How to spend it', 'https://www.ft.com/htsi?format=rss'),
|
||||||
('Others', 'https://www.ft.com/rss/home/uk')
|
('Others', 'https://www.ft.com/rss/home/uk'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, *a):
|
def preprocess_raw_html(self, raw, *a):
|
||||||
@ -159,13 +161,28 @@ class ft(BasicNewsRecipe):
|
|||||||
body = re.sub(r'\[https://\S+?\]', insert_image, body)
|
body = re.sub(r'\[https://\S+?\]', insert_image, body)
|
||||||
if data.get('description'):
|
if data.get('description'):
|
||||||
desc = '<h2>' + data['description'] + '</h2>'
|
desc = '<h2>' + data['description'] + '</h2>'
|
||||||
html = '<html><body><div class="body_json"><h1>' + title + '</h1>' + desc + '<h3>' + author + '</h3>' + image + '<p>' + body
|
html = (
|
||||||
|
'<html><body><div class="body_json"><h1>'
|
||||||
|
+ title
|
||||||
|
+ '</h1>'
|
||||||
|
+ desc
|
||||||
|
+ '<h3>'
|
||||||
|
+ author
|
||||||
|
+ '</h3>'
|
||||||
|
+ image
|
||||||
|
+ '<p>'
|
||||||
|
+ body
|
||||||
|
)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
p = soup.find(**classes('o-topper__standfirst'))
|
p = soup.find(**classes('o-topper__standfirst'))
|
||||||
if p:
|
if p:
|
||||||
p.name = 'p'
|
p.name = 'p'
|
||||||
|
div = soup.findAll(**classes('article-info__time-byline-content'))
|
||||||
|
for d in div:
|
||||||
|
if p_ := d.find('p'):
|
||||||
|
p_.name = 'div'
|
||||||
for table in soup.findAll('table'):
|
for table in soup.findAll('table'):
|
||||||
if len(table.find('tbody').findAll('tr')) > 20:
|
if len(table.find('tbody').findAll('tr')) > 20:
|
||||||
table.find('tbody').decompose()
|
table.find('tbody').decompose()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user