Updated recipe for Linux Devices

2025-07-09 03:04:10 -04:00 · 2009-06-17 10:17:42 -07:00 · 2009-06-17 10:17:42 -07:00 · f7542137d3
commit f7542137d3
parent 77bfb1c08d
1 changed files with 29 additions and 4 deletions
--- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py
+++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Fetch Linuxdevices.
 '''
-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -18,6 +18,7 @@ class Sueddeutsche(BasicNewsRecipe):
    max_articles_per_feed = 50
    no_stylesheets = True
    html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
    html2lrf_options = ['--ignore-tables']
    encoding = 'latin1'
@ -76,3 +77,27 @@ class Sueddeutsche(BasicNewsRecipe):
    feeds =  [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ]
    def preprocess_html(self, soup):
        for item in soup.findAll(re.compile('^a')):
            item.extract()
        match = re.compile(r"^Related")
        for item in soup.findAll('b', text=match):
            item.extract()
        for item in soup.findAll(re.compile('^li')):
            item.extract()
        for item in soup.findAll(re.compile('^ul')):
            item.extract()
        for item in soup.find(re.compile('^br')):
            item.extract()
        for item in soup.findAll('br', limit=10):
            item.extract()
        return soup
    def postprocess_html(self, soup, first):
        for tag in soup.findAll(name=['table', 'tr', 'td']):
            tag.name = 'div'
        return soup