diff --git a/resources/recipes/greader.recipe b/resources/recipes/greader.recipe index 75c273b162..2c9d5aa015 100644 --- a/resources/recipes/greader.recipe +++ b/resources/recipes/greader.recipe @@ -1,35 +1,35 @@ -import urllib, re, mechanize -from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre import __appname__ - -class GoogleReader(BasicNewsRecipe): - title = 'Google Reader' - description = 'This recipe fetches from your Google Reader account unread Starred items and unread Feeds you have placed in a folder via the manage subscriptions feature.' - needs_subscription = True - __author__ = 'davec, rollercoaster, Starson17' - base_url = 'http://www.google.com/reader/atom/' - oldest_article = 365 - max_articles_per_feed = 250 - get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed - use_embedded_content = True - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - request = urllib.urlencode([('Email', self.username), ('Passwd', self.password), - ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)]) - response = br.open('https://www.google.com/accounts/ClientLogin', request) - auth = re.search('Auth=(\S*)', response.read()).group(1) - cookies = mechanize.CookieJar() - br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) - br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)] - return br - - def get_feeds(self): - feeds = [] - soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list') - for id in soup.findAll(True, attrs={'name':['id']}): - url = id.contents[0] - feeds.append((re.search('/([^/]*)$', url).group(1), - self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options)) - return feeds +import urllib, re, mechanize +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre import __appname__ + +class GoogleReader(BasicNewsRecipe): + title = 'Google Reader' + description = 'This recipe fetches from your Google Reader account unread Starred items and unread Feeds you have placed in a folder via the manage subscriptions feature.' + needs_subscription = True + __author__ = 'davec, rollercoaster, Starson17' + base_url = 'http://www.google.com/reader/atom/' + oldest_article = 365 + max_articles_per_feed = 250 + get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed + use_embedded_content = True + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + if self.username is not None and self.password is not None: + request = urllib.urlencode([('Email', self.username), ('Passwd', self.password), + ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)]) + response = br.open('https://www.google.com/accounts/ClientLogin', request) + auth = re.search('Auth=(\S*)', response.read()).group(1) + cookies = mechanize.CookieJar() + br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) + br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)] + return br + + def get_feeds(self): + feeds = [] + soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list') + for id in soup.findAll(True, attrs={'name':['id']}): + url = id.contents[0] + feeds.append((re.search('/([^/]*)$', url).group(1), + self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options)) + return feeds diff --git a/resources/recipes/greader_uber.recipe b/resources/recipes/greader_uber.recipe index c98762fe28..5e02cdef5d 100644 --- a/resources/recipes/greader_uber.recipe +++ b/resources/recipes/greader_uber.recipe @@ -1,35 +1,35 @@ -import urllib, re, mechanize -from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre import __appname__ - -class GoogleReaderUber(BasicNewsRecipe): - title = 'Google Reader uber' - description = 'Fetches all feeds from your Google Reader account including the uncategorized items.' - needs_subscription = True - __author__ = 'davec, rollercoaster, Starson17' - base_url = 'http://www.google.com/reader/atom/' - oldest_article = 365 - max_articles_per_feed = 250 - get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed - use_embedded_content = True - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - request = urllib.urlencode([('Email', self.username), ('Passwd', self.password), - ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)]) - response = br.open('https://www.google.com/accounts/ClientLogin', request) - auth = re.search('Auth=(\S*)', response.read()).group(1) - cookies = mechanize.CookieJar() - br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) - br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)] - return br - - def get_feeds(self): - feeds = [] - soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list') - for id in soup.findAll(True, attrs={'name':['id']}): - url = id.contents[0].replace('broadcast','reading-list') - feeds.append((re.search('/([^/]*)$', url).group(1), - self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options)) - return feeds +import urllib, re, mechanize +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre import __appname__ + +class GoogleReaderUber(BasicNewsRecipe): + title = 'Google Reader uber' + description = 'Fetches all feeds from your Google Reader account including the uncategorized items.' + needs_subscription = True + __author__ = 'davec, rollercoaster, Starson17' + base_url = 'http://www.google.com/reader/atom/' + oldest_article = 365 + max_articles_per_feed = 250 + get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed + use_embedded_content = True + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + if self.username is not None and self.password is not None: + request = urllib.urlencode([('Email', self.username), ('Passwd', self.password), + ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)]) + response = br.open('https://www.google.com/accounts/ClientLogin', request) + auth = re.search('Auth=(\S*)', response.read()).group(1) + cookies = mechanize.CookieJar() + br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) + br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)] + return br + + def get_feeds(self): + feeds = [] + soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list') + for id in soup.findAll(True, attrs={'name':['id']}): + url = id.contents[0].replace('broadcast','reading-list') + feeds.append((re.search('/([^/]*)$', url).group(1), + self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options)) + return feeds diff --git a/resources/recipes/waco_tribune.recipe b/resources/recipes/waco_tribune.recipe new file mode 100644 index 0000000000..18eb61fb26 --- /dev/null +++ b/resources/recipes/waco_tribune.recipe @@ -0,0 +1,34 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1278773519(BasicNewsRecipe): + title = u'Waco Tribune Herald' + __author__ = 'rty' + pubisher = 'A Robinson Media Company' + description = 'Waco, Texas, Newspaper' + category = 'News, Texas, Waco' + oldest_article = 7 + max_articles_per_feed = 100 + + feeds = [ + (u'News', u'http://www.wacotrib.com/news/index.rss2'), + (u'Sports', u'http://www.wacotrib.com/sports/index.rss2'), + (u'AccessWaco', u'http://www.wacotrib.com/accesswaco/index.rss2'), + (u'Opinions', u'http://www.wacotrib.com/opinion/index.rss2') + ] + + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + language = 'en' + encoding = 'utf-8' + conversion_options = {'linearize_tables':True} + masthead_url = 'http://media.wacotrib.com/designimages/wacotrib_logo.jpg' + keep_only_tags = [ + dict(name='div', attrs={'class':'twoColumn left'}), + ] + remove_tags = [ + dict(name='div', attrs={'class':'right blueLinks'}), + ] + remove_tags_after = [ + dict(name='div', attrs={'class':'dottedRule'}), + ]