From 1a361f694f4c4a82e1e0a3d42afc8cb4e4144fc8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 Jan 2012 18:12:45 +0530 Subject: [PATCH] Tweakers.net by Roedi06 --- recipes/tweakers_net.recipe | 66 +++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 recipes/tweakers_net.recipe diff --git a/recipes/tweakers_net.recipe b/recipes/tweakers_net.recipe new file mode 100644 index 0000000000..f9bbe27ec9 --- /dev/null +++ b/recipes/tweakers_net.recipe @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__docformat__ = 'restructuredtext en' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Tweakers(BasicNewsRecipe): + title = u'Tweakers.net - with Reactions' + __author__ = 'Roedi06' + language = 'nl' + oldest_article = 7 + max_articles_per_feed = 100 + cover_url = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif' + + keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}), + {'id':'reacties'}, + ] + + remove_tags = [dict(name='div', attrs={'id' : ['utracker']}), + {'id' : ['channelNav']}, + {'id' : ['contentArea']}, + {'class' : ['breadCrumb']}, + {'class' : ['nextPrevious ellipsis']}, + {'class' : ['advertorial']}, + {'class' : ['sidebar']}, + {'class' : ['filterBox']}, + {'id' : ['toggleButtonTxt']}, + {'id' : ['socialButtons']}, + {'class' : ['button']}, + {'class' : ['textadTop']}, + {'class' : ['commentLink']}, + {'title' : ['Reageer op deze reactie']}, + {'class' : ['pageIndex']}, + {'class' : ['reactieHeader collapsed']}, + ] + no_stylesheets=True + + preprocess_regexps = [ + (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''), + (re.compile(r'

', re.IGNORECASE | re.DOTALL), lambda match : ''), + (re.compile(r'

', re.IGNORECASE | re.DOTALL), lambda match : ''), + (re.compile(r''), lambda h1: ''), + (re.compile(r''), lambda h2: ''), + (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''), + (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''), + (re.compile(r'
.*?
'), lambda h1: ''), + ] + + extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \ + .reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \ + .quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }' + + + feeds = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')] + + def print_version(self, url): + return url + '?max=200' +