Merge from trunk

This commit is contained in:
Charles Haley 2011-07-11 11:07:57 +01:00
commit 56cdfeea77
3 changed files with 86 additions and 8 deletions

View File

@ -0,0 +1,39 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class autogids(BasicNewsRecipe):
title = u'Automatiseringgids IT'
oldest_article = 7
__author__ = 'DrMerry'
description = 'IT-nieuws van Automatiseringgids'
language = 'nl'
publisher = 'AutomatiseringGids'
category = 'Nieuws, IT, Nederlandstalig'
simultaneous_downloads = 5
#delay = 1
timefmt = ' [%A, %d %B, %Y]'
#timefmt = ''
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
publication_type = 'newspaper'
encoding = 'utf-8'
cover_url = 'http://www.automatiseringgids.nl/siteimg/header_logo.gif'
keep_only_tags = [dict(id=['content'])]
extra_css = '.artikelheader {font-size:0.8em; color: #666;} .artikelintro {font-weight:bold} div.imgArticle {float: right; margin: 0 0em 1em 1em; display: block; position: relative; } \
h2 { margin: 0 0 0.5em; min-height: 30px; font-size: 1.5em; letter-spacing: -0.2px; margin: 0 0 0.5em; color: black; font-weight: bold; line-height: 1.2em; padding: 4px 3px 0; }'
remove_tags = [dict(name='div', attrs={'id':['loginbox','reactiecollapsible','reactiebox']}),
dict(name='div', attrs={'class':['column_a','column_c','bannerfullsize','reactieheader','reactiecollapsible','formulier','artikel_headeroptions']}),
dict(name='ul', attrs={'class':['highlightlist']}),
dict(name='input', attrs={'type':['button']}),
dict(name='div', attrs={'style':['display:block; width:428px; height:30px; float:left;']}),
]
preprocess_regexps = [
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
feeds = [(u'Actueel', u'http://www.automatiseringgids.nl/rss.aspx')]

View File

@ -1,9 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'DrMerry Based on v1.01 by Lorenzo Vigentini' __author__ = 'DrMerry Based on v1.01 by Lorenzo Vigentini'
__copyright__ = 'For version 1.02: DrMerry' __copyright__ = 'For version 1.02, 1.03: DrMerry'
__version__ = 'v1.02' __version__ = 'v1.03'
__date__ = '08, July 2011' __date__ = '11, July 2011'
__description__ = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day. Original version (c): 2009, Lorenzo Vigentini <l.vigentini at gmail.com>' __description__ = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day. Original version (c): 2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
''' '''
@ -11,9 +11,10 @@ http://www.computeractive.co.uk/
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class computeractive(BasicNewsRecipe): class computeractive(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini' __author__ = 'DrMerry'
description = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.' description = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
cover_url = 'http://images.pcworld.com/images/common/header/header-logo.gif' cover_url = 'http://images.pcworld.com/images/common/header/header-logo.gif'
@ -31,6 +32,8 @@ class computeractive(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
remove_empty_feeds = True
remove_tags_after = dict(name='div', attrs={'class':'article_tags_block'})
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id':'container_left'}) dict(name='div', attrs={'id':'container_left'})
@ -42,13 +45,14 @@ class computeractive(BasicNewsRecipe):
dict(name='a', attrs={'class':'largerImage'}) dict(name='a', attrs={'class':'largerImage'})
] ]
preprocess_regexps = [
(re.compile(r'(<a [^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
feeds = [ feeds = [
(u'General content', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/all'), (u'General content', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/all'),
(u'News', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/news'), (u'News', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/news'),
(u'Downloads', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/downloads'),
(u'Hardware', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/hardware'),
(u'Software', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/software'),
(u'Competitions', u'http://www.v3.co.uk/feeds/rss20/personal-technology/competitions')
] ]

35
recipes/geek_poke.recipe Normal file
View File

@ -0,0 +1,35 @@
#!/usr/bin/python
from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1307556816(BasicNewsRecipe):
title = u'Geek and Poke'
__author__ = u'DrMerry'
description = u'Geek and Poke Cartoons'
oldest_article = 31
max_articles_per_feed = 100
language = u'en'
simultaneous_downloads = 5
#delay = 1
timefmt = ' [%A, %d %B, %Y]'
summary_length = -1
no_stylesheets = True
cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
remove_javascript = True
remove_empty_feeds = True
publication_type = 'blog'
preprocess_regexps = [ (re.compile(r'(<p>&nbsp;</p>|<iframe.*</iframe>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
(re.compile(r'(&nbsp;| )', re.DOTALL|re.IGNORECASE),lambda match: ' '),
(re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>')
]
extra_css = 'body, h3, p, h2, h1, div, span{margin:0px} h2.date-header {font-size: 0.7em; color:#eee;} h3.entry-header{font-size: 1.0em} div.entry-body{font-size: 0.9em}'
remove_tags_before = dict(name='h2', attrs={'class':'date-header'})
remove_tags_after = dict(name='div', attrs={'class':'entry-body'})
feeds = [(u'Geek and Poke', u'http://feeds.feedburner.com/GeekAndPoke?format=xml')]