#!/usr/bin/env python # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai from __future__ import with_statement __license__ = 'GPL v3' __docformat__ = 'restructuredtext en' import re from calibre.web.feeds.news import BasicNewsRecipe class Tweakers(BasicNewsRecipe): title = u'Tweakers.net - with Reactions' __author__ = 'Roedi06' language = 'nl' oldest_article = 7 max_articles_per_feed = 100 cover_url = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif' keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}), {'id':'reacties'}, ] remove_tags = [dict(name='div', attrs={'id' : ['utracker']}), {'id' : ['channelNav']}, {'id' : ['contentArea']}, {'class' : ['breadCrumb']}, {'class' : ['nextPrevious ellipsis']}, {'class' : ['advertorial']}, {'class' : ['sidebar']}, {'class' : ['filterBox']}, {'id' : ['toggleButtonTxt']}, {'id' : ['socialButtons']}, {'class' : ['button']}, {'class' : ['textadTop']}, {'class' : ['commentLink']}, {'title' : ['Reageer op deze reactie']}, {'class' : ['pageIndex']}, {'class' : ['reactieHeader collapsed']}, ] no_stylesheets=True preprocess_regexps = [ (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''), (re.compile(r'

', re.IGNORECASE | re.DOTALL), lambda match : ''), (re.compile(r'

', re.IGNORECASE | re.DOTALL), lambda match : ''), (re.compile(r''), lambda h1: ''), (re.compile(r''), lambda h2: ''), (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''), (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''), (re.compile(r'
.*?
'), lambda h1: ''), ] extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \ .reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \ .quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }' feeds = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')] def print_version(self, url): return url + '?max=200'