Merge from trunk

This commit is contained in:
Charles Haley 2012-08-05 08:25:20 +02:00
commit b3e190a26d
6 changed files with 172 additions and 21 deletions

View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2012, Artur Stachecki <artur.stachecki@gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class swiatczytnikow(BasicNewsRecipe):
title = u'eKundelek'
description = u'Najsympatyczniejszy blog o e-czytnikach Kindle'
language = 'pl'
__author__ = u'Artur Stachecki'
oldest_article = 7
max_articles_per_feed = 100
remove_tags = [dict(name = 'div', attrs = {'class' : 'feedflare'})]
feeds = [(u'Wpisy', u'http://feeds.feedburner.com/Ekundelekpl?format=xml')]

View File

@ -18,15 +18,15 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
keep_only_tags = [
dict(name='h1'),
dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
dict(name='div',attrs={'id' : ['articleLeft']}),
dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody']}),
dict(name='div',attrs={'id' : ['profileLeft','articleLeft','profileRight','profileBody']}),
dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody',]}),
]
#remove_tags = [
#dict(attrs={'class' : ['player']}),
remove_tags = [
dict(attrs={'id' : ['ctl00_Body_divSlideShow' ]}),
#]
]
feeds = [
(u'Homepage 1',u'http://feed43.com/6655867614547036.xml'),
(u'Homepage 2',u'http://feed43.com/4167731873103110.xml'),
@ -34,7 +34,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
(u'Homepage 4',u'http://feed43.com/6550421522527341.xml'),
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
(u'Gaming',u'http://feed43.com/6537162612465672.xml'),
(u'Girls',u'http://feed43.com/3674777224513254.xml'),
(u'Girls',u'http://feed43.com/4574262733341068.xml'),# edit link http://feed43.com/feed.html?name=4574262733341068
]
extra_css = '''

View File

@ -1,31 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
description = 'News as provide by The Metro -UK'
description = 'Author Dave Asbury : News as provide by The Metro -UK'
#timefmt = ''
__author__ = 'Dave Asbury'
#last update 9/6/12
#last update 4/8/12
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
#no_stylesheets = True
no_stylesheets = True
oldest_article = 1
max_articles_per_feed = 10
max_articles_per_feed = 12
remove_empty_feeds = True
remove_javascript = True
auto_cleanup = True
#auto_cleanup = True
encoding = 'UTF-8'
cover_url ='http://profile.ak.fbcdn.net/hprofile-ak-snc4/157897_117118184990145_840702264_n.jpg'
language = 'en_GB'
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:1.6em;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:1.2em;}
p{font-family:Arial,Helvetica,sans-serif;font-size:1.0em;}
body{font-family:Helvetica,Arial,sans-serif;font-size:1.0em;}
'''
keep_only_tags = [
]
#dict(name='h1'),
#dict(name='h2'),
#dict(name='div', attrs={'class' : ['row','article','img-cnt figure','clrd']})
#dict(name='h3'),
#dict(attrs={'class' : 'BText'}),
]
remove_tags = [
dict(name='span',attrs={'class' : 'share'}),
dict(name='li'),
dict(attrs={'class' : ['twitter-share-button','header-forms','hdr-lnks','close','art-rgt','fd-gr1-b clrd google-article','news m12 clrd clr-b p5t shareBtm','item-ds csl-3-img news','c-1of3 c-last','c-1of1','pd','item-ds csl-3-img sport']}),
dict(attrs={'id' : ['','sky-left','sky-right','ftr-nav','and-ftr','notificationList','logo','miniLogo','comments-news','metro_extras']})
]
remove_tags_before = dict(name='h1')
#remove_tags_after = dict(attrs={'id':['topic-buttons']})
feeds = [
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
extra_css = '''
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''

View File

@ -0,0 +1,117 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2012, Andreas Zeiser <andreas.zeiser@web.de>'
'''
szmobil.sueddeutsche.de/
'''
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class SZmobil(BasicNewsRecipe):
title = u'Süddeutsche Zeitung mobil'
__author__ = u'Andreas Zeiser'
description = u'Nachrichten aus Deutschland. Zugriff auf kostenpflichtiges Abo SZ mobil.'
publisher = u'Sueddeutsche Zeitung'
language = u'de'
publication_type = u'newspaper'
category = u'news, politics, Germany'
no_stylesheets = True
oldest_article = 2
encoding = 'iso-8859-1'
needs_subscription = True
remove_empty_feeds = True
delay = 1
cover_source = 'http://www.sueddeutsche.de/verlag'
timefmt = ' [%a, %d %b, %Y]'
root_url ='http://szmobil.sueddeutsche.de/'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
def get_cover_url(self):
src = self.index_to_soup(self.cover_source)
image_url = src.find(attrs={'class':'preview-image'})
return image_url.div.img['src']
def get_browser(self):
browser = BasicNewsRecipe.get_browser(self)
# Login via fetching of Streiflicht -> Fill out login request
url = self.root_url + 'show.php?id=streif'
browser.open(url)
browser.select_form(nr=0) # to select the first form
browser['username'] = self.username
browser['password'] = self.password
browser.submit()
return browser
def parse_index(self):
# find all sections
src = self.index_to_soup('http://szmobil.sueddeutsche.de')
feeds = []
for itt in src.findAll('a',href=True):
if itt['href'].startswith('show.php?section'):
feeds.append( (itt.string[0:-2],itt['href']) )
all_articles = []
for feed in feeds:
feed_url = self.root_url + feed[1]
feed_title = feed[0]
self.report_progress(0, ('Fetching feed')+' %s...'%(feed_title if feed_title else feed_url))
src = self.index_to_soup(feed_url)
articles = []
shorttitles = dict()
for itt in src.findAll('a', href=True):
if itt['href'].startswith('show.php?id='):
article_url = itt['href']
article_id = int(re.search("id=(\d*)&etag=", itt['href']).group(1))
# first check if link is a special article in section "Meinungsseite"
if itt.find('strong')!= None:
article_name = itt.strong.string
article_shorttitle = itt.contents[1]
articles.append( (article_name, article_url, article_id) )
shorttitles[article_id] = article_shorttitle
continue
# candidate for a general article
if itt.string == None:
article_name = ''
else:
article_name = itt.string
if (article_name[0:10] == "&nbsp;mehr"):
# just another link ("mehr") to an article
continue
if itt.has_key('id'):
shorttitles[article_id] = article_name
else:
articles.append( (article_name, article_url, article_id) )
feed_articles = []
for article_name, article_url, article_id in articles:
url = self.root_url + article_url
title = article_name
pubdate = strftime('%a, %d %b')
description = ''
if shorttitles.has_key(article_id):
description = shorttitles[article_id]
# we do not want the flag ("Impressum")
if "HERAUSGEGEBEN VOM" in description:
continue
d = dict(title=title, url=url, date=pubdate, description=description, content='')
feed_articles.append(d)
all_articles.append( (feed_title, feed_articles) )
return all_articles

View File

@ -49,13 +49,18 @@ class OpenFeedback(DeviceError):
raise NotImplementedError
class InitialConnectionError(OpenFeedback):
""" Errors detected during connection after detection but before open """
""" Errors detected during connection after detection but before open, for
e.g. in the is_connected() method. """
class OpenFailed(ProtocolError):
""" Raised when device cannot be opened this time. No retry is to be done.
The device should continue to be polled for future opens. If the
message is empty, no exception trace is produced. """
def __init__(self, msg):
ProtocolError.__init__(self, msg)
self.show_me = bool(msg and msg.strip())
class DeviceBusy(ProtocolError):
""" Raised when device is busy """
def __init__(self, uerr=""):

View File

@ -252,7 +252,7 @@ class DeviceManager(Thread): # {{{
if DEBUG:
prints('Device connect failed again, giving up')
except OpenFailed as e:
if str(e):
if e.show_me:
traceback.print_exc()
# Mount devices that don't use USB, such as the folder device and iTunes