From f7542137d3dfc4209ac9f705feecf4e441405ed6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 17 Jun 2009 10:17:42 -0700 Subject: [PATCH] Updated recipe for Linux Devices --- .../web/feeds/recipes/recipe_linuxdevices.py | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py index cd914e96ad..f3006e427e 100644 --- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py +++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py @@ -4,12 +4,12 @@ __copyright__ = '2008, Kovid Goyal ' ''' Fetch Linuxdevices. ''' - +import re from calibre.web.feeds.news import BasicNewsRecipe class Sueddeutsche(BasicNewsRecipe): - + title = u'Linuxdevices' description = 'News about Linux driven Hardware' __author__ = 'Oliver Niesner' @@ -18,6 +18,7 @@ class Sueddeutsche(BasicNewsRecipe): max_articles_per_feed = 50 no_stylesheets = True html2epub_options = 'linearize_tables = True\nbase_font_size2=14' + html2lrf_options = ['--ignore-tables'] encoding = 'latin1' @@ -71,8 +72,32 @@ class Sueddeutsche(BasicNewsRecipe): dict(id='nnav-logo'), dict(id='nnav-oly'), dict(id='readcomment')] - - feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] + + feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] + + def preprocess_html(self, soup): + for item in soup.findAll(re.compile('^a')): + item.extract() + match = re.compile(r"^Related") + for item in soup.findAll('b', text=match): + item.extract() + for item in soup.findAll(re.compile('^li')): + item.extract() + for item in soup.findAll(re.compile('^ul')): + item.extract() + for item in soup.find(re.compile('^br')): + item.extract() + for item in soup.findAll('br', limit=10): + item.extract() + return soup + + + def postprocess_html(self, soup, first): + for tag in soup.findAll(name=['table', 'tr', 'td']): + tag.name = 'div' + return soup + +