mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
685fc41ce8
@ -208,17 +208,27 @@ class nytFeeds(BasicNewsRecipe):
|
|||||||
img { display:block; margin:0 auto; }
|
img { display:block; margin:0 auto; }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
# https://www.nytimes.com/rss
|
||||||
|
# https://developer.nytimes.com/docs/rss-api/1/overview
|
||||||
feeds = [
|
feeds = [
|
||||||
('World', 'https://rss.nytimes.com/services/xml/rss/nyt/World.xml'),
|
# to filter out all opinions from other sections first
|
||||||
('US', 'https://rss.nytimes.com/services/xml/rss/nyt/US.xml'),
|
'https://rss.nytimes.com/services/xml/rss/nyt/Opinion.xml',
|
||||||
('Business', 'https://rss.nytimes.com/services/xml/rss/nyt/Business.xml'),
|
|
||||||
('Technology', 'https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml'),
|
'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml',
|
||||||
('Science', 'https://rss.nytimes.com/services/xml/rss/nyt/Science.xml'),
|
'https://rss.nytimes.com/services/xml/rss/nyt/World.xml',
|
||||||
('Arts', 'https://rss.nytimes.com/services/xml/rss/nyt/Arts.xml'),
|
'https://rss.nytimes.com/services/xml/rss/nyt/US.xml',
|
||||||
('Fashion & Style', 'https://rss.nytimes.com/services/xml/rss/nyt/FashionandStyle.xml'),
|
'https://rss.nytimes.com/services/xml/rss/nyt/Business.xml',
|
||||||
('TMagazine', 'https://rss.nytimes.com/services/xml/rss/nyt/tmagazine.xml'),
|
'https://rss.nytimes.com/services/xml/rss/nyt/YourMoney.xml',
|
||||||
('Travel', 'https://www.nytimes.com/services/xml/rss/nyt/Travel.xml'),
|
'https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml',
|
||||||
('Sunday Review', 'https://rss.nytimes.com/services/xml/rss/nyt/sunday-review.xml'),
|
'https://rss.nytimes.com/services/xml/rss/nyt/Science.xml',
|
||||||
|
'https://rss.nytimes.com/services/xml/rss/nyt/Climate.xml',
|
||||||
|
'https://rss.nytimes.com/services/xml/rss/nyt/Health.xml',
|
||||||
|
'https://rss.nytimes.com/services/xml/rss/nyt/Arts.xml',
|
||||||
|
'https://rss.nytimes.com/services/xml/rss/nyt/FashionandStyle.xml',
|
||||||
|
'https://rss.nytimes.com/services/xml/rss/nyt/tmagazine.xml',
|
||||||
|
'https://rss.nytimes.com/services/xml/rss/nyt/books.xml',
|
||||||
|
'https://www.nytimes.com/services/xml/rss/nyt/Travel.xml',
|
||||||
|
'http://nytimes.com/timeswire/feeds/'
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_browser(self, *args, **kwargs):
|
def get_browser(self, *args, **kwargs):
|
||||||
@ -231,6 +241,10 @@ class nytFeeds(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
|
if '/interactive/' in url:
|
||||||
|
return '<html><body><p><em>'\
|
||||||
|
+ 'This is an interactive article, which is supposed to be read in a browser.'\
|
||||||
|
+ '</p></em></body></html>'
|
||||||
data = extract_json(raw_html)
|
data = extract_json(raw_html)
|
||||||
return '\n'.join(article_parse(data))
|
return '\n'.join(article_parse(data))
|
||||||
|
|
||||||
@ -239,9 +253,15 @@ class nytFeeds(BasicNewsRecipe):
|
|||||||
if w and isinstance(w, str):
|
if w and isinstance(w, str):
|
||||||
res = '-' + w
|
res = '-' + w
|
||||||
for img in soup.findAll('img', attrs={'src':True}):
|
for img in soup.findAll('img', attrs={'src':True}):
|
||||||
ext = img['src'].split('?')[0].split('.')[-1]
|
if '-article' in img['src']:
|
||||||
img['src'] = img['src'].rsplit('-article', 1)[0] + res + '.' + ext
|
ext = img['src'].split('?')[0].split('.')[-1]
|
||||||
|
img['src'] = img['src'].rsplit('-article', 1)[0] + res + '.' + ext
|
||||||
for c in soup.findAll('div', attrs={'class':'cap'}):
|
for c in soup.findAll('div', attrs={'class':'cap'}):
|
||||||
for p in c.findAll(['p', 'div']):
|
for p in c.findAll(['p', 'div']):
|
||||||
p.name = 'span'
|
p.name = 'span'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
|
if not re.search(r'/video/|live|/athletic/', url):
|
||||||
|
return url
|
||||||
|
Loading…
x
Reference in New Issue
Block a user