diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 8caee5a346..84ea237585 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -497,6 +497,10 @@ class BasicNewsRecipe(object, LoggingInterface): def _postprocess_html(self, soup, first_fetch, job_info): + if self.no_stylesheets: + for link in list(soup.findAll('link', type=re.compile('css')))+list(soup.findAll('style')): + link.extract() + head = soup.find('head') if not head: head = soup.find('body') @@ -513,9 +517,6 @@ class BasicNewsRecipe(object, LoggingInterface): url, __appname__, center=self.center_navbar) elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') body.insert(0, elem) - if self.no_stylesheets: - for link in list(soup.findAll('link', type=re.compile('css'))): - link.extract() if self.remove_javascript: for script in list(soup.findAll('script')): script.extract() diff --git a/src/calibre/web/feeds/recipes/nytimes.py b/src/calibre/web/feeds/recipes/nytimes.py index de431d6532..b771fb0fd3 100644 --- a/src/calibre/web/feeds/recipes/nytimes.py +++ b/src/calibre/web/feeds/recipes/nytimes.py @@ -17,12 +17,11 @@ class NYTimes(BasicNewsRecipe): description = 'Daily news from the New York Times' timefmt = ' [%a, %d %b, %Y]' needs_subscription = True - - remove_tags_before = dict(name='h1') - remove_tags_after = dict(id='footer') - remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool']}), - dict(id=['footer', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']), - dict(name=['script', 'noscript'])] + remove_tags_before = dict(id='article') + remove_tags_after = dict(id='article') + remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}), + dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']), + dict(name=['script', 'noscript', 'style'])] encoding = 'cp1252' no_stylesheets = True extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}' @@ -59,7 +58,7 @@ class NYTimes(BasicNewsRecipe): if not a: continue url = re.sub(r'\?.*', '', a['href']) - url += '?pagewanted=print' + url += '?pagewanted=all' title = self.tag_to_string(a, use_alt=True).strip() description = '' pubdate = strftime('%a, %d %b')