Updated recipe for Linux Devices

This commit is contained in:
Kovid Goyal 2009-06-17 10:17:42 -07:00
parent 77bfb1c08d
commit f7542137d3

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
Fetch Linuxdevices. Fetch Linuxdevices.
''' '''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -18,6 +18,7 @@ class Sueddeutsche(BasicNewsRecipe):
max_articles_per_feed = 50 max_articles_per_feed = 50
no_stylesheets = True no_stylesheets = True
html2epub_options = 'linearize_tables = True\nbase_font_size2=14' html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
html2lrf_options = ['--ignore-tables']
encoding = 'latin1' encoding = 'latin1'
@ -76,3 +77,27 @@ class Sueddeutsche(BasicNewsRecipe):
feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ]
def preprocess_html(self, soup):
for item in soup.findAll(re.compile('^a')):
item.extract()
match = re.compile(r"^Related")
for item in soup.findAll('b', text=match):
item.extract()
for item in soup.findAll(re.compile('^li')):
item.extract()
for item in soup.findAll(re.compile('^ul')):
item.extract()
for item in soup.find(re.compile('^br')):
item.extract()
for item in soup.findAll('br', limit=10):
item.extract()
return soup
def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['table', 'tr', 'td']):
tag.name = 'div'
return soup