mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
98 lines
4.0 KiB
Plaintext
98 lines
4.0 KiB
Plaintext
__license__ = 'GPL v3'
|
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
|
|
'''
|
|
Fetch Linuxdevices.
|
|
'''
|
|
import re
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class LinuxDevices(BasicNewsRecipe):
|
|
|
|
title = u'Linuxdevices'
|
|
description = 'News about Linux driven Hardware'
|
|
__author__ = 'Oliver Niesner'
|
|
use_embedded_content = False
|
|
timefmt = ' [%a %d %b %Y]'
|
|
max_articles_per_feed = 50
|
|
no_stylesheets = True
|
|
language = 'en'
|
|
|
|
remove_javascript = True
|
|
conversion_options = { 'linearize_tables' : True}
|
|
encoding = 'latin1'
|
|
|
|
|
|
remove_tags_after = [dict(id='intelliTxt')]
|
|
filter_regexps = [r'ad\.doubleclick\.net']
|
|
|
|
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
|
|
dict(name='div', attrs={'class':'bannerSky'}),
|
|
dict(name='div', attrs={'border':'0'}),
|
|
dict(name='div', attrs={'class':'footerLinks'}),
|
|
dict(name='div', attrs={'class':'seitenanfang'}),
|
|
dict(name='td', attrs={'class':'mar5'}),
|
|
dict(name='table', attrs={'class':'pageAktiv'}),
|
|
dict(name='table', attrs={'class':'xartable'}),
|
|
dict(name='table', attrs={'class':'wpnavi'}),
|
|
dict(name='table', attrs={'class':'bgcontent absatz'}),
|
|
dict(name='table', attrs={'class':'footer'}),
|
|
dict(name='table', attrs={'class':'artikelBox'}),
|
|
dict(name='table', attrs={'class':'kommentare'}),
|
|
dict(name='table', attrs={'class':'pageBoxBot'}),
|
|
dict(name='table', attrs={'td':'height="3"'}),
|
|
dict(name='table', attrs={'class':'contentpaneopen'}),
|
|
dict(name='td', attrs={'nowrap':'nowrap'}),
|
|
dict(name='td', attrs={'align':'left'}),
|
|
dict(name='td', attrs={'height':'5'}),
|
|
dict(name='td', attrs={'class':'ArticleWidgetsHeadline'}),
|
|
dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
|
|
dict(name='div', attrs={'class':'similar-article-box'}),
|
|
dict(name='div', attrs={'class':'videoBigHack'}),
|
|
dict(name='td', attrs={'class':'artikelDruckenRight'}),
|
|
dict(name='td', attrs={'class':'width="200"'}),
|
|
dict(name='span', attrs={'class':'content_rating'}),
|
|
dict(name='a', attrs={'href':'http://www.addthis.com/bookmark.php'}),
|
|
dict(name='a', attrs={'href':'/news'}),
|
|
dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
|
|
dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
|
|
dict(name='iframe'),
|
|
dict(name='form'),
|
|
dict(name='span', attrs={'class':'hidePrint'}),
|
|
dict(id='ArticleWidgets'),
|
|
dict(id='headerLBox'),
|
|
dict(id='nointelliTXT'),
|
|
dict(id='rechteSpalte'),
|
|
dict(id='newsticker-list-small'),
|
|
dict(id='ntop5'),
|
|
dict(id='ntop5send'),
|
|
dict(id='ntop5commented'),
|
|
dict(id='nnav-bgheader'),
|
|
dict(id='nnav-headerteaser'),
|
|
dict(id='nnav-head'),
|
|
dict(id='nnav-top'),
|
|
dict(id='readcomment')]
|
|
|
|
|
|
|
|
feeds = [ (u'Linuxdevices', u'http://www.linuxfordevices.com/rss.xml') ]
|
|
|
|
def preprocess_html(self, soup):
|
|
match = re.compile(r"^Related")
|
|
for item in soup.findAll('b', text=match):
|
|
item.extract()
|
|
for item in soup.findAll(re.compile('^ul')):
|
|
item.extract()
|
|
for item in soup.findAll('br', limit=10):
|
|
item.extract()
|
|
return soup
|
|
|
|
|
|
def postprocess_html(self, soup, first):
|
|
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
|
tag.name = 'div'
|
|
return soup
|
|
|
|
|