mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1224 (no_stylesheets didn't get rid of all original CSS reference) and improve recipe for the nytimes
This commit is contained in:
parent
a3fa08360c
commit
f8ee3e0c4e
@ -497,6 +497,10 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
|
||||
|
||||
def _postprocess_html(self, soup, first_fetch, job_info):
|
||||
if self.no_stylesheets:
|
||||
for link in list(soup.findAll('link', type=re.compile('css')))+list(soup.findAll('style')):
|
||||
link.extract()
|
||||
|
||||
head = soup.find('head')
|
||||
if not head:
|
||||
head = soup.find('body')
|
||||
@ -513,9 +517,6 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
url, __appname__, center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(0, elem)
|
||||
if self.no_stylesheets:
|
||||
for link in list(soup.findAll('link', type=re.compile('css'))):
|
||||
link.extract()
|
||||
if self.remove_javascript:
|
||||
for script in list(soup.findAll('script')):
|
||||
script.extract()
|
||||
|
@ -17,12 +17,11 @@ class NYTimes(BasicNewsRecipe):
|
||||
description = 'Daily news from the New York Times'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
needs_subscription = True
|
||||
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(id='footer')
|
||||
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool']}),
|
||||
dict(id=['footer', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
|
||||
dict(name=['script', 'noscript'])]
|
||||
remove_tags_before = dict(id='article')
|
||||
remove_tags_after = dict(id='article')
|
||||
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
|
||||
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
encoding = 'cp1252'
|
||||
no_stylesheets = True
|
||||
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
||||
@ -59,7 +58,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
if not a:
|
||||
continue
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
url += '?pagewanted=print'
|
||||
url += '?pagewanted=all'
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
|
Loading…
x
Reference in New Issue
Block a user