Waco Tribune Herald by rty

2025-08-30 23:00:21 -04:00 · 2010-07-14 09:01:01 -06:00 · 2010-07-14 09:01:01 -06:00 · 8a5a5e2ad1
commit 8a5a5e2ad1
parent 9a4b661ac6
3 changed files with 104 additions and 70 deletions
--- a/resources/recipes/greader.recipe
+++ b/resources/recipes/greader.recipe
@ -1,35 +1,35 @@
-import urllib, re, mechanize
+import urllib, re, mechanize
-from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre import __appname__
+from calibre import __appname__
-
+
-class GoogleReader(BasicNewsRecipe):
+class GoogleReader(BasicNewsRecipe):
-    title   = 'Google Reader'
+    title   = 'Google Reader'
-    description = 'This recipe fetches from your Google Reader account unread Starred items and unread Feeds you have placed in a folder via the manage subscriptions feature.'
+    description = 'This recipe fetches from your Google Reader account unread Starred items and unread Feeds you have placed in a folder via the manage subscriptions feature.'
-    needs_subscription = True
+    needs_subscription = True
-    __author__ = 'davec, rollercoaster, Starson17'
+    __author__ = 'davec, rollercoaster, Starson17'
-    base_url = 'http://www.google.com/reader/atom/'
+    base_url = 'http://www.google.com/reader/atom/'
-    oldest_article = 365
+    oldest_article = 365
-    max_articles_per_feed = 250
+    max_articles_per_feed = 250
-    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
+    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
-    use_embedded_content = True
+    use_embedded_content = True
-
+
-    def get_browser(self):
+    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
+        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
+        if self.username is not None and self.password is not None:
-            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
+            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
-                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
+                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
-            response = br.open('https://www.google.com/accounts/ClientLogin', request)
+            response = br.open('https://www.google.com/accounts/ClientLogin', request)
-            auth = re.search('Auth=(\S*)', response.read()).group(1)
+            auth = re.search('Auth=(\S*)', response.read()).group(1)
-            cookies = mechanize.CookieJar()
+            cookies = mechanize.CookieJar()
-            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
+            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
-            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
+            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
-        return br
+        return br
-
+
-    def get_feeds(self):
+    def get_feeds(self):
-        feeds = []
+        feeds = []
-        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
+        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
-        for id in soup.findAll(True, attrs={'name':['id']}):
+        for id in soup.findAll(True, attrs={'name':['id']}):
-            url = id.contents[0]
+            url = id.contents[0]
-            feeds.append((re.search('/([^/]*)$', url).group(1),
+            feeds.append((re.search('/([^/]*)$', url).group(1),
-                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
+                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
-        return feeds
+        return feeds
--- a/resources/recipes/greader_uber.recipe
+++ b/resources/recipes/greader_uber.recipe
@ -1,35 +1,35 @@
-import urllib, re, mechanize
+import urllib, re, mechanize
-from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre import __appname__
+from calibre import __appname__
-
+
-class GoogleReaderUber(BasicNewsRecipe):
+class GoogleReaderUber(BasicNewsRecipe):
-    title   = 'Google Reader uber'
+    title   = 'Google Reader uber'
-    description = 'Fetches all feeds from your Google Reader account including the uncategorized items.'
+    description = 'Fetches all feeds from your Google Reader account including the uncategorized items.'
-    needs_subscription = True
+    needs_subscription = True
-    __author__ = 'davec, rollercoaster, Starson17'
+    __author__ = 'davec, rollercoaster, Starson17'
-    base_url = 'http://www.google.com/reader/atom/'
+    base_url = 'http://www.google.com/reader/atom/'
-    oldest_article = 365
+    oldest_article = 365
-    max_articles_per_feed = 250
+    max_articles_per_feed = 250
-    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
+    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
-    use_embedded_content = True
+    use_embedded_content = True
-
+
-    def get_browser(self):
+    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
+        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
+        if self.username is not None and self.password is not None:
-            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
+            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
-                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
+                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
-            response = br.open('https://www.google.com/accounts/ClientLogin', request)
+            response = br.open('https://www.google.com/accounts/ClientLogin', request)
-            auth = re.search('Auth=(\S*)', response.read()).group(1)
+            auth = re.search('Auth=(\S*)', response.read()).group(1)
-            cookies = mechanize.CookieJar()
+            cookies = mechanize.CookieJar()
-            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
+            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
-            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
+            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
-        return br
+        return br
-
+
-    def get_feeds(self):
+    def get_feeds(self):
-        feeds = []
+        feeds = []
-        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
+        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
-        for id in soup.findAll(True, attrs={'name':['id']}):
+        for id in soup.findAll(True, attrs={'name':['id']}):
-            url = id.contents[0].replace('broadcast','reading-list')
+            url = id.contents[0].replace('broadcast','reading-list')
-            feeds.append((re.search('/([^/]*)$', url).group(1),
+            feeds.append((re.search('/([^/]*)$', url).group(1),
-                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
+                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
-        return feeds
+        return feeds
--- a/resources/recipes/waco_tribune.recipe
+++ b/resources/recipes/waco_tribune.recipe
@ -0,0 +1,34 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1278773519(BasicNewsRecipe):
    title          = u'Waco Tribune Herald'
    __author__ = 'rty'
    pubisher  = 'A Robinson Media Company'
    description           = 'Waco, Texas, Newspaper'
    category              = 'News, Texas, Waco'
    oldest_article = 7
    max_articles_per_feed = 100
    feeds          = [
 	(u'News', u'http://www.wacotrib.com/news/index.rss2'),
 	(u'Sports', u'http://www.wacotrib.com/sports/index.rss2'),
 	(u'AccessWaco', u'http://www.wacotrib.com/accesswaco/index.rss2'),
 	(u'Opinions', u'http://www.wacotrib.com/opinion/index.rss2')
 	]
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en'
    encoding               = 'utf-8'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://media.wacotrib.com/designimages/wacotrib_logo.jpg'
    keep_only_tags = [
                              dict(name='div', attrs={'class':'twoColumn left'}),
                               ]
    remove_tags = [
                    dict(name='div', attrs={'class':'right blueLinks'}),
                         ]
    remove_tags_after = [
                  dict(name='div', attrs={'class':'dottedRule'}),
                         ]