From 2b826c4974152c707f6b3a54aa9310710f6769a1 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 9 Apr 2012 01:02:09 +0200 Subject: [PATCH 01/15] Improved Read It Later recipe, uses API to get articles feed, and new "Article View" data to get enhanced article content (with images) --- recipes/readitlater.recipe | 171 +++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 75 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 38f7ec1a9a..08196d3a3d 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,36 +1,39 @@ -""" +''' readitlaterlist.com -""" +''' __license__ = 'GPL v3' __copyright__ = ''' -2010, Darko Miletic -2011, Przemyslaw Kryger -2012, tBunnyMan +2011, Keith Callenberg +2012, Alayn Gortazar ''' -from calibre import strftime +from contextlib import closing from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag +import json +import urllib +import urllib2 - -class Readitlater(BasicNewsRecipe): - title = 'ReadItLater' - __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' - description = '''Personalized news feeds. Go to readitlaterlist.com to setup \ - up your news. This version displays pages of articles from \ - oldest to newest, with max & minimum counts, and marks articles \ - read after downloading.''' +class Readitlaterv2(BasicNewsRecipe): + title = 'Read It Later v2' + __author__ = 'Keith Callenberg' + description = '''Personalized news feeds. Go to readitlaterlist.com to + setup up your news. Fill in your account + username, and optionally you can add your password.''' publisher = 'readitlaterlist.com' category = 'news, custom' oldest_article = 7 - max_articles_per_feed = 50 - minimum_articles = 1 + max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False needs_subscription = True - INDEX = u'http://readitlaterlist.com' + KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' + INDEX = 'https://readitlaterlist.com/' LOGIN = INDEX + u'/l' - readList = [] + articles = [] + + feeds = [(u'Unread articles' , INDEX)] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -43,66 +46,84 @@ class Readitlater(BasicNewsRecipe): br.submit() return br - def get_feeds(self): - self.report_progress(0, ('Fetching list of pages...')) - lfeeds = [] - i = 1 - feedurl = self.INDEX + u'/unread/1' - while True: - title = u'Unread articles, page ' + str(i) - lfeeds.insert(0, (title, feedurl)) - self.report_progress(0, ('Got ') + str(i) + (' pages')) - i += 1 - soup = self.index_to_soup(feedurl) - ritem = soup.find('a', attrs={'id':'next', 'class':'active'}) - if ritem is None: - break - feedurl = self.INDEX + ritem['href'] - return lfeeds + def parse_index(self): - totalfeeds = [] - articlesToGrab = self.max_articles_per_feed - lfeeds = self.get_feeds() - for feedobj in lfeeds: - if articlesToGrab < 1: - break - feedtitle, feedurl = feedobj - self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - ritem = soup.find('ul', attrs={'id':'list'}) - for item in reversed(ritem.findAll('li')): - if articlesToGrab < 1: - break - else: - articlesToGrab -= 1 - description = '' - atag = item.find('a', attrs={'class':'text'}) - if atag and atag.has_key('href'): - url = self.INDEX + atag['href'] - title = self.tag_to_string(item.div) - date = strftime(self.timefmt) - articles.append({ - 'title' :title - ,'date' :date - ,'url' :url - ,'description':description - }) - readLink = item.find('a', attrs={'class':'check'})['href'] - self.readList.append(readLink) - totalfeeds.append((feedtitle, articles)) - if len(self.readList) < self.minimum_articles: - raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") - return totalfeeds + index = self.INDEX + 'v2/get?' + index += 'apikey=' + self.KEY + index += '&username=' + self.username + '&password=' + self.password + index += '&state=unread' + index += '&count=' + str(self.max_articles_per_feed) - def mark_as_read(self, markList): - br = self.get_browser() - for link in markList: - url = self.INDEX + link - response = br.open(url) - response + open_func = getattr(self.browser, 'open_novisit', self.browser.open) + with closing(open_func(index)) as f: + results = f.read() + if not results: + raise RuntimeError('Could not fetch index!') + + json_obj = json.loads(results) + + if len(json_obj['list']) > 0: + for item in json_obj['list'].iteritems(): + dataurl = "https://readitlaterlist.com/a/x/getArticle.php?itemId=" + item[1]['item_id'] + self.articles.append({ + 'title':item[1]['title'], + 'date':item[1]['time_added'], + 'url':dataurl, + 'description':item[1]['item_id'], + 'real_url':item[1]['url'] + }) + return [('Unread', self.articles)] + + def preprocess_raw_html(self, raw_html, url): + # get article and image urls from json object + json_obj = json.loads(raw_html) + self.images = {} + for image in json_obj['article']['images']: + self.images[image] = json_obj['article']['images'][image]['src'] + return json_obj['article']['article'] + + def preprocess_html(self, soup): + # Insert images on RIL_IMG_# divs + for key, url in self.images.iteritems(): + tag = Tag(soup, 'img') + tag['src'] = url + div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) + div.insert(0, tag) + return soup def cleanup(self): - self.mark_as_read(self.readList) + # From a list of urls, create a human-readable JSON string + # suitable for passing to the ReadItLater SEND::READ method. + + self.markAsRead(self.createMarkList(self.articles)) + + def createMarkList(self, articles): + urls = [] + for article in self.articles: + urls.append(article['real_url']) + items = ['"%d": {"url": "%s"}' % (n,u) for n,u in enumerate(urls)] + s = '{\n %s\n}' % (',\n '.join(items),) + return s + + def markAsRead(self, markList): + url = self.INDEX + 'v2/send' + values = { + 'username' : self.username, + 'password' : self.password, + 'apikey' : self.KEY, + 'read' : markList + } + data = urllib.urlencode(values) + + try: + print 'Calling ReadItLater API...' + request = urllib2.Request(url,data) + response = urllib2.urlopen(request) + the_page = response.read() + print 'response =', response.code + except urllib2.HTTPError as e: + print 'The server could not fulfill the request: ', e + except urllib2.URLError as e: + print 'The call to ReadItLater API failed:', e From b81deec83a040ab2645cd14017e69f92edc60410 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 16 Apr 2012 23:05:06 +0200 Subject: [PATCH 02/15] Added title to each article and minimum_recipes support --- recipes/readitlater.recipe | 42 +++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 08196d3a3d..53061dd72a 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -3,7 +3,10 @@ readitlaterlist.com ''' __license__ = 'GPL v3' __copyright__ = ''' +2010, Darko Miletic +2011, Przemyslaw Kryger 2011, Keith Callenberg +2012, tBunnyMan 2012, Alayn Gortazar ''' @@ -14,16 +17,17 @@ import json import urllib import urllib2 -class Readitlaterv2(BasicNewsRecipe): - title = 'Read It Later v2' - __author__ = 'Keith Callenberg' +class Readitlater(BasicNewsRecipe): + title = 'Read It Later' + __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan, Alayn Gortazar' description = '''Personalized news feeds. Go to readitlaterlist.com to setup up your news. Fill in your account username, and optionally you can add your password.''' publisher = 'readitlaterlist.com' category = 'news, custom' oldest_article = 7 - max_articles_per_feed = 100 + max_articles_per_feed = 50 + minimum_articles = 1 no_stylesheets = True use_embedded_content = False needs_subscription = True @@ -51,7 +55,10 @@ class Readitlaterv2(BasicNewsRecipe): def parse_index(self): index = self.INDEX + 'v2/get?' index += 'apikey=' + self.KEY - index += '&username=' + self.username + '&password=' + self.password + if self.username is not None: + index += '&username=' + self.username + if self.password is not None: + index += '&password=' + self.password index += '&state=unread' index += '&count=' + str(self.max_articles_per_feed) @@ -62,10 +69,12 @@ class Readitlaterv2(BasicNewsRecipe): raise RuntimeError('Could not fetch index!') json_obj = json.loads(results) - - if len(json_obj['list']) > 0: + + if len(json_obj['list']) >= self.minimum_articles: for item in json_obj['list'].iteritems(): - dataurl = "https://readitlaterlist.com/a/x/getArticle.php?itemId=" + item[1]['item_id'] + # TODO: This URL should be modified by it's corresponding API call in a future. + # Actually is not possible to get the Article View potential throught an API call (12/04/2012) + dataurl = self.INDEX + "a/x/getArticle.php?itemId=" + item[1]['item_id'] self.articles.append({ 'title':item[1]['title'], 'date':item[1]['time_added'], @@ -73,6 +82,9 @@ class Readitlaterv2(BasicNewsRecipe): 'description':item[1]['item_id'], 'real_url':item[1]['url'] }) + else: + raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") + return [('Unread', self.articles)] def preprocess_raw_html(self, raw_html, url): @@ -81,23 +93,25 @@ class Readitlaterv2(BasicNewsRecipe): self.images = {} for image in json_obj['article']['images']: self.images[image] = json_obj['article']['images'][image]['src'] - return json_obj['article']['article'] + title = '

{title}

'.format(title=json_obj['article']['title']) + link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) + return link + title + json_obj['article']['article'] def preprocess_html(self, soup): # Insert images on RIL_IMG_# divs for key, url in self.images.iteritems(): - tag = Tag(soup, 'img') - tag['src'] = url + imgtag = Tag(soup, 'img') + imgtag['src'] = url div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) - div.insert(0, tag) + div.insert(0, imgtag) return soup def cleanup(self): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - self.markAsRead(self.createMarkList(self.articles)) - + #self.markAsRead(self.createMarkList(self.articles)) + return def createMarkList(self, articles): urls = [] From 56aec322cd7aca25ff550b532a1019d12d6cafeb Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Tue, 17 Apr 2012 22:38:46 +0200 Subject: [PATCH 03/15] Added horizontal line between articles --- recipes/readitlater.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 53061dd72a..5e425b8b5f 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -95,7 +95,7 @@ class Readitlater(BasicNewsRecipe): self.images[image] = json_obj['article']['images'][image]['src'] title = '

{title}

'.format(title=json_obj['article']['title']) link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) - return link + title + json_obj['article']['article'] + return link + title + json_obj['article']['article'] + '
' def preprocess_html(self, soup): # Insert images on RIL_IMG_# divs From 2a2ae6bb1403ba96999cb142e90f89c7f1606777 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Tue, 17 Apr 2012 22:40:30 +0200 Subject: [PATCH 04/15] Added recipe for Berria --- recipes/berria.recipe | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 recipes/berria.recipe diff --git a/recipes/berria.recipe b/recipes/berria.recipe new file mode 100644 index 0000000000..240682231e --- /dev/null +++ b/recipes/berria.recipe @@ -0,0 +1,37 @@ +__license__ = 'GPL v3' +__copyright__ = '2012, Alayn Gortazar ' +''' +www.berria.info +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Berria(BasicNewsRecipe): + title = 'Berria' + __author__ = 'Alayn Gortazar' + description = 'Euskal Herriko euskarazko egunkaria' + publisher = 'Berria' + category = 'news, politics, Basque Country' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'eu' + remove_empty_feeds = True + masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png' + + keep_only_tags = [ + dict(id='goiburua') + ,dict(name='div', attrs={'class':'testua' }) + ] + remove_tags = [ + dict(name='a', attrs={'class':'iruzkinak'}) + ] + + feeds = [ + (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') +# ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml' ) +# ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) +# ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) +# ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) + ] From adf67292fb4641ed4ad10d21348fe6dfb749ce0b Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Tue, 17 Apr 2012 22:42:11 +0200 Subject: [PATCH 05/15] Mark downloaded articles as read --- recipes/readitlater.recipe | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 5e425b8b5f..50c0cc27eb 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -110,8 +110,7 @@ class Readitlater(BasicNewsRecipe): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - #self.markAsRead(self.createMarkList(self.articles)) - return + self.markAsRead(self.createMarkList(self.articles)) def createMarkList(self, articles): urls = [] From 7bcb500a4766119cae3bea01fb58a77b202c9fd2 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Wed, 18 Apr 2012 00:47:00 +0200 Subject: [PATCH 06/15] Improved Berria recipe visualization --- recipes/berria.recipe | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index 240682231e..9d5bfe1585 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -22,16 +22,20 @@ class Berria(BasicNewsRecipe): keep_only_tags = [ dict(id='goiburua') + ,dict(name='div', attrs={'class':'burukoak'}) ,dict(name='div', attrs={'class':'testua' }) + ,dict(name='div', attrs={'class':'ber_ikus' }) ] remove_tags = [ dict(name='a', attrs={'class':'iruzkinak'}) ] + + extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .titularra{font-size: x-large} .sarrera{font-weight: bold}' feeds = [ (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') -# ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml' ) -# ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) -# ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) -# ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) + ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml' ) + ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) + ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) + ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) ] From 1111868a36c66e58ba7b02a06876fd0139dd0d8e Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Wed, 18 Apr 2012 13:20:27 +0200 Subject: [PATCH 07/15] Improved Berria recipe styles --- recipes/berria.recipe | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index 9d5bfe1585..d987e1224b 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -22,15 +22,15 @@ class Berria(BasicNewsRecipe): keep_only_tags = [ dict(id='goiburua') - ,dict(name='div', attrs={'class':'burukoak'}) - ,dict(name='div', attrs={'class':'testua' }) - ,dict(name='div', attrs={'class':'ber_ikus' }) + ,dict(name='div', attrs={'class':['ber_ikus']}) + ,dict(name='section', attrs={'class':'ber_ikus' }) ] remove_tags = [ dict(name='a', attrs={'class':'iruzkinak'}) + ,dict(name='div', attrs={'class':'laguntzaileak'}) ] - extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .titularra{font-size: x-large} .sarrera{font-weight: bold}' + extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}' feeds = [ (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') From dda955e67c15baec96482d1e17fe79057b6a27dd Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Wed, 18 Apr 2012 13:49:25 +0200 Subject: [PATCH 08/15] Added correct feed url's to Berria recipe --- recipes/berria.recipe | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index d987e1224b..6d2b5e05ec 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -34,8 +34,10 @@ class Berria(BasicNewsRecipe): feeds = [ (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') - ,(u'Paperezko edizioa', u'http://berria.info/rss/berria.xml' ) ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) + ,(u'Euskal Herria' , u'http://berria.info/rss/euskalherria.xml' ) + ,(u'Ekonomia' , u'http://berria.info/rss/ekonomia.xml' ) + ,(u'Mundua' , u'http://berria.info/rss/mundua.xml' ) ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) ] From f9817538923c9d929d3da6193187b46f470d6f85 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Wed, 18 Apr 2012 23:06:32 +0200 Subject: [PATCH 09/15] Migrating to getpocket.com --- recipes/readitlater.recipe | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 50c0cc27eb..ec0b9c83b7 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -32,7 +32,7 @@ class Readitlater(BasicNewsRecipe): use_embedded_content = False needs_subscription = True KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' - INDEX = 'https://readitlaterlist.com/' + INDEX = 'https://getpocket.com/' LOGIN = INDEX + u'/l' articles = [] @@ -109,7 +109,6 @@ class Readitlater(BasicNewsRecipe): def cleanup(self): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - self.markAsRead(self.createMarkList(self.articles)) def createMarkList(self, articles): From de81f45215f18feb3c98338e8abd8a1f90535379 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Thu, 19 Apr 2012 01:25:40 +0200 Subject: [PATCH 10/15] Added "Enhanced version" option to read it later recipe --- recipes/readitlater.recipe | 72 +++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index ec0b9c83b7..c9d39e9082 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -32,33 +32,38 @@ class Readitlater(BasicNewsRecipe): use_embedded_content = False needs_subscription = True KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' + API_TEXT_INDEX = 'https://text.readitlaterlist.com/' + API_INDEX = 'https://readitlaterlist.com/' INDEX = 'https://getpocket.com/' LOGIN = INDEX + u'/l' + enhanced_version = True - articles = [] + articles = [] feeds = [(u'Unread articles' , INDEX)] def get_browser(self): br = BasicNewsRecipe.get_browser() - if self.username is not None: - br.open(self.LOGIN) - br.select_form(nr=0) - br['feed_id'] = self.username - if self.password is not None: - br['password'] = self.password - br.submit() + if self.enhanced_version: + if self.username is not None: + br.open(self.LOGIN) + br.select_form(nr=0) + br['feed_id'] = self.username + if self.password is not None: + br['password'] = self.password + br.submit() return br - + def get_auth_params(self): + auth_params = 'apikey=' + self.KEY + if self.username is not None: + auth_params += '&username=' + self.username + if self.password is not None: + auth_params += '&password=' + self.password + return auth_params def parse_index(self): - index = self.INDEX + 'v2/get?' - index += 'apikey=' + self.KEY - if self.username is not None: - index += '&username=' + self.username - if self.password is not None: - index += '&password=' + self.password + index = self.API_INDEX + 'v2/get?' + self.get_auth_params() index += '&state=unread' index += '&count=' + str(self.max_articles_per_feed) @@ -74,7 +79,11 @@ class Readitlater(BasicNewsRecipe): for item in json_obj['list'].iteritems(): # TODO: This URL should be modified by it's corresponding API call in a future. # Actually is not possible to get the Article View potential throught an API call (12/04/2012) - dataurl = self.INDEX + "a/x/getArticle.php?itemId=" + item[1]['item_id'] + if self.enhanced_version: + dataurl = self.INDEX + 'a/x/getArticle.php?itemId=' + item[1]['item_id'] + else: + dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params() + dataurl += '&url=' + item[1]['url'] self.articles.append({ 'title':item[1]['title'], 'date':item[1]['time_added'], @@ -89,21 +98,26 @@ class Readitlater(BasicNewsRecipe): def preprocess_raw_html(self, raw_html, url): # get article and image urls from json object - json_obj = json.loads(raw_html) - self.images = {} - for image in json_obj['article']['images']: - self.images[image] = json_obj['article']['images'][image]['src'] - title = '

{title}

'.format(title=json_obj['article']['title']) - link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) - return link + title + json_obj['article']['article'] + '
' + if self.enhanced_version: + json_obj = json.loads(raw_html) + self.images = {} + for image in json_obj['article']['images']: + self.images[image] = json_obj['article']['images'][image]['src'] + title = '

{title}

'.format(title=json_obj['article']['title']) + link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) + html = link + title + json_obj['article']['article'] + else: + html = raw_html + return html + '
' def preprocess_html(self, soup): # Insert images on RIL_IMG_# divs - for key, url in self.images.iteritems(): - imgtag = Tag(soup, 'img') - imgtag['src'] = url - div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) - div.insert(0, imgtag) + if self.enhanced_version: + for key, url in self.images.iteritems(): + imgtag = Tag(soup, 'img') + imgtag['src'] = url + div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) + div.insert(0, imgtag) return soup def cleanup(self): @@ -120,7 +134,7 @@ class Readitlater(BasicNewsRecipe): return s def markAsRead(self, markList): - url = self.INDEX + 'v2/send' + url = self.API_INDEX + 'v2/send' values = { 'username' : self.username, 'password' : self.password, From 43ada84eef994851a79e80fe00bb5a43408fd043 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 00:48:28 +0200 Subject: [PATCH 11/15] Oldest to newest order. Added time to cover --- recipes/readitlater.recipe | 43 ++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index c9d39e9082..9cda772354 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -10,9 +10,11 @@ __copyright__ = ''' 2012, Alayn Gortazar ''' +from operator import itemgetter from contextlib import closing from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag +from calibre import strftime import json import urllib import urllib2 @@ -31,16 +33,16 @@ class Readitlater(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False needs_subscription = True + mark_as_read_after_dl = False + enhanced_version = True + KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' - API_TEXT_INDEX = 'https://text.readitlaterlist.com/' + API_TEXT_INDEX = 'https://text.readitlaterlist.com/' API_INDEX = 'https://readitlaterlist.com/' INDEX = 'https://getpocket.com/' LOGIN = INDEX + u'/l' - enhanced_version = True articles = [] - - feeds = [(u'Unread articles' , INDEX)] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -63,9 +65,10 @@ class Readitlater(BasicNewsRecipe): return auth_params def parse_index(self): - index = self.API_INDEX + 'v2/get?' + self.get_auth_params() + index = self.API_INDEX + 'v3/get?' + self.get_auth_params() index += '&state=unread' index += '&count=' + str(self.max_articles_per_feed) + index += '&sort=oldest' open_func = getattr(self.browser, 'open_novisit', self.browser.open) with closing(open_func(index)) as f: @@ -85,15 +88,17 @@ class Readitlater(BasicNewsRecipe): dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params() dataurl += '&url=' + item[1]['url'] self.articles.append({ - 'title':item[1]['title'], + 'title':item[1]['resolved_title'], 'date':item[1]['time_added'], 'url':dataurl, 'description':item[1]['item_id'], - 'real_url':item[1]['url'] + 'sort_id':int(item[1]['sort_id']), + 'real_url':item[1]['given_url'] }) else: raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") - + + self.articles = sorted(self.articles, key=itemgetter('sort_id')) return [('Unread', self.articles)] def preprocess_raw_html(self, raw_html, url): @@ -123,7 +128,8 @@ class Readitlater(BasicNewsRecipe): def cleanup(self): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - self.markAsRead(self.createMarkList(self.articles)) + if self.mark_as_read_after_dl: + self.markAsRead(self.createMarkList(self.articles)) def createMarkList(self, articles): urls = [] @@ -153,3 +159,22 @@ class Readitlater(BasicNewsRecipe): print 'The server could not fulfill the request: ', e except urllib2.URLError as e: print 'The call to ReadItLater API failed:', e + + def default_cover(self, cover_file): + ''' + Create a generic cover for recipes that don't have a cover + This override adds time to the cover + ''' + try: + from calibre.ebooks import calibre_cover + title = self.title if isinstance(self.title, unicode) else \ + self.title.decode(preferred_encoding, 'replace') + date = strftime(self.timefmt) + time = strftime('[%I:%M %p]') + img_data = calibre_cover(title, date, time) + cover_file.write(img_data) + cover_file.flush() + except: + self.log.exception('Failed to generate default cover') + return False + return True From 6185fa15528f487366fd9f48d1d9f90e684f21c4 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 00:55:24 +0200 Subject: [PATCH 12/15] Changing 'unread' state with 'queue' --- recipes/readitlater.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 9cda772354..26dbe5baa7 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -66,7 +66,7 @@ class Readitlater(BasicNewsRecipe): def parse_index(self): index = self.API_INDEX + 'v3/get?' + self.get_auth_params() - index += '&state=unread' + index += '&state=queue' index += '&count=' + str(self.max_articles_per_feed) index += '&sort=oldest' From 211ff892b235f1c6d56d88df61870293f902686c Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 01:17:10 +0200 Subject: [PATCH 13/15] Making code more PEP8 friendly --- recipes/readitlater.recipe | 54 ++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 26dbe5baa7..e1c622ee0d 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,7 +1,7 @@ ''' readitlaterlist.com ''' -__license__ = 'GPL v3' +__license__ = 'GPL v3' __copyright__ = ''' 2010, Darko Miletic 2011, Przemyslaw Kryger @@ -10,7 +10,7 @@ __copyright__ = ''' 2012, Alayn Gortazar ''' -from operator import itemgetter +from operator import itemgetter from contextlib import closing from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag @@ -19,6 +19,7 @@ import json import urllib import urllib2 + class Readitlater(BasicNewsRecipe): title = 'Read It Later' __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan, Alayn Gortazar' @@ -35,7 +36,7 @@ class Readitlater(BasicNewsRecipe): needs_subscription = True mark_as_read_after_dl = False enhanced_version = True - + KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' API_TEXT_INDEX = 'https://text.readitlaterlist.com/' API_INDEX = 'https://readitlaterlist.com/' @@ -59,16 +60,17 @@ class Readitlater(BasicNewsRecipe): def get_auth_params(self): auth_params = 'apikey=' + self.KEY if self.username is not None: - auth_params += '&username=' + self.username + auth_params += '&username=' + self.username if self.password is not None: - auth_params += '&password=' + self.password + auth_params += '&password=' + self.password return auth_params def parse_index(self): + # WARNING: Pre-alpha API, I just figured out this calls params. Surprisingly worked! :) index = self.API_INDEX + 'v3/get?' + self.get_auth_params() index += '&state=queue' - index += '&count=' + str(self.max_articles_per_feed) - index += '&sort=oldest' + index += '&count=' + str(self.max_articles_per_feed) + index += '&sort=oldest' open_func = getattr(self.browser, 'open_novisit', self.browser.open) with closing(open_func(index)) as f: @@ -77,10 +79,10 @@ class Readitlater(BasicNewsRecipe): raise RuntimeError('Could not fetch index!') json_obj = json.loads(results) - + if len(json_obj['list']) >= self.minimum_articles: for item in json_obj['list'].iteritems(): - # TODO: This URL should be modified by it's corresponding API call in a future. + # TODO: This URL should be modified by it's corresponding API call in a future. # Actually is not possible to get the Article View potential throught an API call (12/04/2012) if self.enhanced_version: dataurl = self.INDEX + 'a/x/getArticle.php?itemId=' + item[1]['item_id'] @@ -88,16 +90,16 @@ class Readitlater(BasicNewsRecipe): dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params() dataurl += '&url=' + item[1]['url'] self.articles.append({ - 'title':item[1]['resolved_title'], - 'date':item[1]['time_added'], - 'url':dataurl, - 'description':item[1]['item_id'], - 'sort_id':int(item[1]['sort_id']), - 'real_url':item[1]['given_url'] + 'title': item[1]['resolved_title'], + 'date': item[1]['time_added'], + 'url': dataurl, + 'description': item[1]['item_id'], + 'sort_id': int(item[1]['sort_id']), + 'real_url': item[1]['given_url'] }) else: raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") - + self.articles = sorted(self.articles, key=itemgetter('sort_id')) return [('Unread', self.articles)] @@ -108,7 +110,7 @@ class Readitlater(BasicNewsRecipe): self.images = {} for image in json_obj['article']['images']: self.images[image] = json_obj['article']['images'][image]['src'] - title = '

{title}

'.format(title=json_obj['article']['title']) + title = '

{title}

'.format(title=json_obj['article']['title']) link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) html = link + title + json_obj['article']['article'] else: @@ -121,37 +123,37 @@ class Readitlater(BasicNewsRecipe): for key, url in self.images.iteritems(): imgtag = Tag(soup, 'img') imgtag['src'] = url - div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) + div = soup.find('div', attrs={'id': 'RIL_IMG_' + key}) div.insert(0, imgtag) return soup def cleanup(self): # From a list of urls, create a human-readable JSON string # suitable for passing to the ReadItLater SEND::READ method. - if self.mark_as_read_after_dl: + if self.mark_as_read_after_dl: self.markAsRead(self.createMarkList(self.articles)) def createMarkList(self, articles): urls = [] for article in self.articles: urls.append(article['real_url']) - items = ['"%d": {"url": "%s"}' % (n,u) for n,u in enumerate(urls)] + items = ['"%d": {"url": "%s"}' % (n, u) for n, u in enumerate(urls)] s = '{\n %s\n}' % (',\n '.join(items),) return s def markAsRead(self, markList): url = self.API_INDEX + 'v2/send' values = { - 'username' : self.username, - 'password' : self.password, - 'apikey' : self.KEY, - 'read' : markList + 'username': self.username, + 'password': self.password, + 'apikey': self.KEY, + 'read': markList } data = urllib.urlencode(values) - + try: print 'Calling ReadItLater API...' - request = urllib2.Request(url,data) + request = urllib2.Request(url, data) response = urllib2.urlopen(request) the_page = response.read() print 'response =', response.code From 857ee6bc8192de5aac2eab03dd04ef669f102eb1 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 01:28:10 +0200 Subject: [PATCH 14/15] Making berria recipe more PEP8 friendly --- recipes/berria.recipe | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index 6d2b5e05ec..406a27e36c 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -6,12 +6,13 @@ www.berria.info from calibre.web.feeds.news import BasicNewsRecipe + class Berria(BasicNewsRecipe): title = 'Berria' __author__ = 'Alayn Gortazar' description = 'Euskal Herriko euskarazko egunkaria' publisher = 'Berria' - category = 'news, politics, Basque Country' + category = 'news, politics, sports, Basque Country' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True @@ -21,23 +22,23 @@ class Berria(BasicNewsRecipe): masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png' keep_only_tags = [ - dict(id='goiburua') - ,dict(name='div', attrs={'class':['ber_ikus']}) - ,dict(name='section', attrs={'class':'ber_ikus' }) + dict(id='goiburua'), + dict(name='div', attrs={'class':['ber_ikus']}), + dict(name='section', attrs={'class':'ber_ikus'}) ] remove_tags = [ - dict(name='a', attrs={'class':'iruzkinak'}) - ,dict(name='div', attrs={'class':'laguntzaileak'}) + dict(name='a', attrs={'class':'iruzkinak'}), + dict(name='div', attrs={'class':'laguntzaileak'}) ] extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}' - + feeds = [ - (u'Edizioa jarraia' , u'http://berria.info/rss/ediziojarraia.xml') - ,(u'Iritzia' , u'http://berria.info/rss/iritzia.xml' ) - ,(u'Euskal Herria' , u'http://berria.info/rss/euskalherria.xml' ) - ,(u'Ekonomia' , u'http://berria.info/rss/ekonomia.xml' ) - ,(u'Mundua' , u'http://berria.info/rss/mundua.xml' ) - ,(u'Kirola' , u'http://berria.info/rss/kirola.xml' ) - ,(u'Plaza' , u'http://berria.info/rss/plaza.xml' ) + (u'Edizioa jarraia', u'http://berria.info/rss/ediziojarraia.xml'), + (u'Iritzia', u'http://berria.info/rss/iritzia.xml'), + (u'Euskal Herria', u'http://berria.info/rss/euskalherria.xml'), + (u'Ekonomia', u'http://berria.info/rss/ekonomia.xml'), + (u'Mundua', u'http://berria.info/rss/mundua.xml'), + (u'Kirola', u'http://berria.info/rss/kirola.xml'), + (u'Plaza', u'http://berria.info/rss/plaza.xml') ] From 898cd84b726cc29832f7e541e328dc57dfaf3bf3 Mon Sep 17 00:00:00 2001 From: Alayn Gortazar Date: Mon, 23 Apr 2012 10:37:24 +0200 Subject: [PATCH 15/15] changing a dot --- recipes/berria.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/berria.recipe b/recipes/berria.recipe index 406a27e36c..06f8344988 100644 --- a/recipes/berria.recipe +++ b/recipes/berria.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2012, Alayn Gortazar ' +__copyright__ = '2012, Alayn Gortazar ' ''' www.berria.info '''