From 9a94c16e69a40950785813e77116953de87c4d29 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 4 Aug 2012 17:34:24 +0530 Subject: [PATCH 1/4] Update FHM UK --- recipes/fhm_uk.recipe | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/recipes/fhm_uk.recipe b/recipes/fhm_uk.recipe index 6ee5ae3fb6..84455ddd3c 100644 --- a/recipes/fhm_uk.recipe +++ b/recipes/fhm_uk.recipe @@ -18,15 +18,15 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): keep_only_tags = [ dict(name='h1'), dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}), - dict(name='div',attrs={'id' : ['articleLeft']}), - dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody']}), + dict(name='div',attrs={'id' : ['profileLeft','articleLeft','profileRight','profileBody']}), + dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody',]}), ] - #remove_tags = [ - #dict(attrs={'class' : ['player']}), + remove_tags = [ + dict(attrs={'id' : ['ctl00_Body_divSlideShow' ]}), - #] + ] feeds = [ (u'Homepage 1',u'http://feed43.com/6655867614547036.xml'), (u'Homepage 2',u'http://feed43.com/4167731873103110.xml'), @@ -34,7 +34,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): (u'Homepage 4',u'http://feed43.com/6550421522527341.xml'), (u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'), (u'Gaming',u'http://feed43.com/6537162612465672.xml'), - (u'Girls',u'http://feed43.com/3674777224513254.xml'), + (u'Girls',u'http://feed43.com/4574262733341068.xml'),# edit link http://feed43.com/feed.html?name=4574262733341068 ] extra_css = ''' From 6c781af4d78e13c5dee03432ac789fc8ed93778b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 4 Aug 2012 23:28:18 +0530 Subject: [PATCH 2/4] Sueddeutsche Mobil by Andreas Zeiser --- recipes/sueddeutsche_mobil.recipe | 117 ++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 recipes/sueddeutsche_mobil.recipe diff --git a/recipes/sueddeutsche_mobil.recipe b/recipes/sueddeutsche_mobil.recipe new file mode 100644 index 0000000000..d1b08cbcba --- /dev/null +++ b/recipes/sueddeutsche_mobil.recipe @@ -0,0 +1,117 @@ +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +__license__ = 'GPL v3' +__copyright__ = '2012, Andreas Zeiser ' +''' +szmobil.sueddeutsche.de/ +''' + +from calibre import strftime +from calibre.web.feeds.recipes import BasicNewsRecipe +import re + +class SZmobil(BasicNewsRecipe): + title = u'Süddeutsche Zeitung mobil' + __author__ = u'Andreas Zeiser' + description = u'Nachrichten aus Deutschland. Zugriff auf kostenpflichtiges Abo SZ mobil.' + publisher = u'Sueddeutsche Zeitung' + language = u'de' + publication_type = u'newspaper' + category = u'news, politics, Germany' + + no_stylesheets = True + oldest_article = 2 + encoding = 'iso-8859-1' + needs_subscription = True + remove_empty_feeds = True + delay = 1 + cover_source = 'http://www.sueddeutsche.de/verlag' + + timefmt = ' [%a, %d %b, %Y]' + + root_url ='http://szmobil.sueddeutsche.de/' + keep_only_tags = [dict(name='div', attrs={'class':'article'})] + + def get_cover_url(self): + src = self.index_to_soup(self.cover_source) + image_url = src.find(attrs={'class':'preview-image'}) + return image_url.div.img['src'] + + def get_browser(self): + browser = BasicNewsRecipe.get_browser(self) + + # Login via fetching of Streiflicht -> Fill out login request + url = self.root_url + 'show.php?id=streif' + browser.open(url) + + browser.select_form(nr=0) # to select the first form + browser['username'] = self.username + browser['password'] = self.password + browser.submit() + + return browser + + def parse_index(self): + # find all sections + src = self.index_to_soup('http://szmobil.sueddeutsche.de') + feeds = [] + for itt in src.findAll('a',href=True): + if itt['href'].startswith('show.php?section'): + feeds.append( (itt.string[0:-2],itt['href']) ) + + all_articles = [] + for feed in feeds: + feed_url = self.root_url + feed[1] + feed_title = feed[0] + + self.report_progress(0, ('Fetching feed')+' %s...'%(feed_title if feed_title else feed_url)) + + src = self.index_to_soup(feed_url) + articles = [] + shorttitles = dict() + for itt in src.findAll('a', href=True): + if itt['href'].startswith('show.php?id='): + article_url = itt['href'] + article_id = int(re.search("id=(\d*)&etag=", itt['href']).group(1)) + + # first check if link is a special article in section "Meinungsseite" + if itt.find('strong')!= None: + article_name = itt.strong.string + article_shorttitle = itt.contents[1] + + articles.append( (article_name, article_url, article_id) ) + shorttitles[article_id] = article_shorttitle + continue + + + # candidate for a general article + if itt.string == None: + article_name = '' + else: + article_name = itt.string + + if (article_name[0:10] == " mehr"): + # just another link ("mehr") to an article + continue + + if itt.has_key('id'): + shorttitles[article_id] = article_name + else: + articles.append( (article_name, article_url, article_id) ) + + feed_articles = [] + for article_name, article_url, article_id in articles: + url = self.root_url + article_url + title = article_name + pubdate = strftime('%a, %d %b') + description = '' + if shorttitles.has_key(article_id): + description = shorttitles[article_id] + # we do not want the flag ("Impressum") + if "HERAUSGEGEBEN VOM" in description: + continue + d = dict(title=title, url=url, date=pubdate, description=description, content='') + feed_articles.append(d) + all_articles.append( (feed_title, feed_articles) ) + + return all_articles + From 5aa627e5edfc825378b78b61116ddee49bc6781c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 5 Aug 2012 09:28:47 +0530 Subject: [PATCH 3/4] Update Metro UK --- recipes/metro_uk.recipe | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/recipes/metro_uk.recipe b/recipes/metro_uk.recipe index fa5d5c19c8..5b7b3a64ed 100644 --- a/recipes/metro_uk.recipe +++ b/recipes/metro_uk.recipe @@ -1,31 +1,42 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1306097511(BasicNewsRecipe): title = u'Metro UK' - description = 'News as provide by The Metro -UK' + description = 'Author Dave Asbury : News as provide by The Metro -UK' #timefmt = '' __author__ = 'Dave Asbury' - #last update 9/6/12 + #last update 4/8/12 cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg' - #no_stylesheets = True + no_stylesheets = True oldest_article = 1 - max_articles_per_feed = 10 + max_articles_per_feed = 12 remove_empty_feeds = True remove_javascript = True - auto_cleanup = True + #auto_cleanup = True encoding = 'UTF-8' - + cover_url ='http://profile.ak.fbcdn.net/hprofile-ak-snc4/157897_117118184990145_840702264_n.jpg' language = 'en_GB' masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif' + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:1.6em;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:1.2em;} + p{font-family:Arial,Helvetica,sans-serif;font-size:1.0em;} + body{font-family:Helvetica,Arial,sans-serif;font-size:1.0em;} + ''' keep_only_tags = [ - - ] + #dict(name='h1'), + #dict(name='h2'), + #dict(name='div', attrs={'class' : ['row','article','img-cnt figure','clrd']}) + #dict(name='h3'), + #dict(attrs={'class' : 'BText'}), + ] remove_tags = [ - + dict(name='span',attrs={'class' : 'share'}), + dict(name='li'), + dict(attrs={'class' : ['twitter-share-button','header-forms','hdr-lnks','close','art-rgt','fd-gr1-b clrd google-article','news m12 clrd clr-b p5t shareBtm','item-ds csl-3-img news','c-1of3 c-last','c-1of1','pd','item-ds csl-3-img sport']}), + dict(attrs={'id' : ['','sky-left','sky-right','ftr-nav','and-ftr','notificationList','logo','miniLogo','comments-news','metro_extras']}) ] - + remove_tags_before = dict(name='h1') + #remove_tags_after = dict(attrs={'id':['topic-buttons']}) feeds = [ (u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')] - extra_css = ''' - body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;} - ''' From b402a8ad2e431e5bb670623c62eb4135153da924 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 5 Aug 2012 09:47:51 +0530 Subject: [PATCH 4/4] eKundelek.pl by Artur Stachecki --- recipes/ekundelek_pl.recipe | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 recipes/ekundelek_pl.recipe diff --git a/recipes/ekundelek_pl.recipe b/recipes/ekundelek_pl.recipe new file mode 100644 index 0000000000..ebc5d39bbd --- /dev/null +++ b/recipes/ekundelek_pl.recipe @@ -0,0 +1,18 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2012, Artur Stachecki ' + +from calibre.web.feeds.news import BasicNewsRecipe + +class swiatczytnikow(BasicNewsRecipe): + title = u'eKundelek' + description = u'Najsympatyczniejszy blog o e-czytnikach Kindle' + language = 'pl' + __author__ = u'Artur Stachecki' + oldest_article = 7 + max_articles_per_feed = 100 + + remove_tags = [dict(name = 'div', attrs = {'class' : 'feedflare'})] + + feeds = [(u'Wpisy', u'http://feeds.feedburner.com/Ekundelekpl?format=xml')]