mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			98 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			98 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
__license__   = 'GPL v3'
 | 
						|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
						|
 | 
						|
'''
 | 
						|
Fetch Linuxdevices.
 | 
						|
'''
 | 
						|
import re
 | 
						|
from calibre.web.feeds.news import BasicNewsRecipe
 | 
						|
 | 
						|
 | 
						|
class LinuxDevices(BasicNewsRecipe):
 | 
						|
 | 
						|
    title = u'Linuxdevices'
 | 
						|
    description = 'News about Linux driven Hardware'
 | 
						|
    __author__ = 'Oliver Niesner'
 | 
						|
    use_embedded_content   = False
 | 
						|
    timefmt = ' [%a %d %b %Y]'
 | 
						|
    max_articles_per_feed = 50
 | 
						|
    no_stylesheets = True
 | 
						|
    language = 'en'
 | 
						|
 | 
						|
    remove_javascript = True
 | 
						|
    conversion_options = { 'linearize_tables' : True}
 | 
						|
    encoding = 'latin1'
 | 
						|
 | 
						|
 | 
						|
    remove_tags_after = [dict(id='intelliTxt')]
 | 
						|
    filter_regexps = [r'ad\.doubleclick\.net']
 | 
						|
 | 
						|
    remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
 | 
						|
                   dict(name='div', attrs={'class':'bannerSky'}),
 | 
						|
                   dict(name='div', attrs={'border':'0'}),
 | 
						|
                   dict(name='div', attrs={'class':'footerLinks'}),
 | 
						|
                   dict(name='div', attrs={'class':'seitenanfang'}),
 | 
						|
                   dict(name='td', attrs={'class':'mar5'}),
 | 
						|
                   dict(name='table', attrs={'class':'pageAktiv'}),
 | 
						|
                   dict(name='table', attrs={'class':'xartable'}),
 | 
						|
                   dict(name='table', attrs={'class':'wpnavi'}),
 | 
						|
                   dict(name='table', attrs={'class':'bgcontent absatz'}),
 | 
						|
                   dict(name='table', attrs={'class':'footer'}),
 | 
						|
                   dict(name='table', attrs={'class':'artikelBox'}),
 | 
						|
                   dict(name='table', attrs={'class':'kommentare'}),
 | 
						|
                   dict(name='table', attrs={'class':'pageBoxBot'}),
 | 
						|
                   dict(name='table', attrs={'td':'height="3"'}),
 | 
						|
                   dict(name='table', attrs={'class':'contentpaneopen'}),
 | 
						|
                   dict(name='td', attrs={'nowrap':'nowrap'}),
 | 
						|
                   dict(name='td', attrs={'align':'left'}),
 | 
						|
                   dict(name='td', attrs={'height':'5'}),
 | 
						|
                   dict(name='td', attrs={'class':'ArticleWidgetsHeadline'}),
 | 
						|
                   dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
 | 
						|
                   dict(name='div', attrs={'class':'similar-article-box'}),
 | 
						|
                   dict(name='div', attrs={'class':'videoBigHack'}),
 | 
						|
                   dict(name='td', attrs={'class':'artikelDruckenRight'}),
 | 
						|
                   dict(name='td', attrs={'class':'width="200"'}),
 | 
						|
                   dict(name='span', attrs={'class':'content_rating'}),
 | 
						|
                   dict(name='a', attrs={'href':'http://www.addthis.com/bookmark.php'}),
 | 
						|
                   dict(name='a', attrs={'href':'/news'}),
 | 
						|
                   dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
 | 
						|
                   dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
 | 
						|
                   dict(name='iframe'),
 | 
						|
                   dict(name='form'),
 | 
						|
                   dict(name='span', attrs={'class':'hidePrint'}),
 | 
						|
                   dict(id='ArticleWidgets'),
 | 
						|
                   dict(id='headerLBox'),
 | 
						|
                   dict(id='nointelliTXT'),
 | 
						|
                   dict(id='rechteSpalte'),
 | 
						|
                   dict(id='newsticker-list-small'),
 | 
						|
                   dict(id='ntop5'),
 | 
						|
                   dict(id='ntop5send'),
 | 
						|
                   dict(id='ntop5commented'),
 | 
						|
                   dict(id='nnav-bgheader'),
 | 
						|
                   dict(id='nnav-headerteaser'),
 | 
						|
                   dict(id='nnav-head'),
 | 
						|
                   dict(id='nnav-top'),
 | 
						|
                   dict(id='readcomment')]
 | 
						|
 | 
						|
 | 
						|
 | 
						|
    feeds =  [ (u'Linuxdevices', u'http://www.linuxfordevices.com/rss.xml') ]
 | 
						|
 | 
						|
    def preprocess_html(self, soup):
 | 
						|
        match = re.compile(r"^Related")
 | 
						|
        for item in soup.findAll('b', text=match):
 | 
						|
            item.extract()
 | 
						|
        for item in soup.findAll(re.compile('^ul')):
 | 
						|
            item.extract()
 | 
						|
        for item in soup.findAll('br', limit=10):
 | 
						|
            item.extract()
 | 
						|
        return soup
 | 
						|
 | 
						|
 | 
						|
    def postprocess_html(self, soup, first):
 | 
						|
        for tag in soup.findAll(name=['table', 'tr', 'td']):
 | 
						|
            tag.name = 'div'
 | 
						|
        return soup
 | 
						|
 | 
						|
 |