Waco Tribune Herald by rty

2026-05-31 02:55:19 -04:00 · 2010-07-14 09:01:01 -06:00
parent 9a4b661ac6
commit 8a5a5e2ad1
3 changed files with 104 additions and 70 deletions
@@ -1,35 +1,35 @@
-import urllib, re, mechanize
-from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre import __appname__
-
-class GoogleReader(BasicNewsRecipe):
-    title   = 'Google Reader'
-    description = 'This recipe fetches from your Google Reader account unread Starred items and unread Feeds you have placed in a folder via the manage subscriptions feature.'
-    needs_subscription = True
-    __author__ = 'davec, rollercoaster, Starson17'
-    base_url = 'http://www.google.com/reader/atom/'
-    oldest_article = 365
-    max_articles_per_feed = 250
-    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
-    use_embedded_content = True
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
-            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
-                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
-            response = br.open('https://www.google.com/accounts/ClientLogin', request)
-            auth = re.search('Auth=(\S*)', response.read()).group(1)
-            cookies = mechanize.CookieJar()
-            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
-            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
-        return br
-
-    def get_feeds(self):
-        feeds = []
-        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
-        for id in soup.findAll(True, attrs={'name':['id']}):
-            url = id.contents[0]
-            feeds.append((re.search('/([^/]*)$', url).group(1),
-                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
-        return feeds
+import urllib, re, mechanize
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre import __appname__
+
+class GoogleReader(BasicNewsRecipe):
+    title   = 'Google Reader'
+    description = 'This recipe fetches from your Google Reader account unread Starred items and unread Feeds you have placed in a folder via the manage subscriptions feature.'
+    needs_subscription = True
+    __author__ = 'davec, rollercoaster, Starson17'
+    base_url = 'http://www.google.com/reader/atom/'
+    oldest_article = 365
+    max_articles_per_feed = 250
+    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
+    use_embedded_content = True
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        if self.username is not None and self.password is not None:
+            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
+                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
+            response = br.open('https://www.google.com/accounts/ClientLogin', request)
+            auth = re.search('Auth=(\S*)', response.read()).group(1)
+            cookies = mechanize.CookieJar()
+            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
+            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
+        return br
+
+    def get_feeds(self):
+        feeds = []
+        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
+        for id in soup.findAll(True, attrs={'name':['id']}):
+            url = id.contents[0]
+            feeds.append((re.search('/([^/]*)$', url).group(1),
+                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
+        return feeds
@@ -1,35 +1,35 @@
-import urllib, re, mechanize
-from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre import __appname__
-
-class GoogleReaderUber(BasicNewsRecipe):
-    title   = 'Google Reader uber'
-    description = 'Fetches all feeds from your Google Reader account including the uncategorized items.'
-    needs_subscription = True
-    __author__ = 'davec, rollercoaster, Starson17'
-    base_url = 'http://www.google.com/reader/atom/'
-    oldest_article = 365
-    max_articles_per_feed = 250
-    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
-    use_embedded_content = True
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
-            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
-                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
-            response = br.open('https://www.google.com/accounts/ClientLogin', request)
-            auth = re.search('Auth=(\S*)', response.read()).group(1)
-            cookies = mechanize.CookieJar()
-            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
-            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
-        return br
-
-    def get_feeds(self):
-        feeds = []
-        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
-        for id in soup.findAll(True, attrs={'name':['id']}):
-            url = id.contents[0].replace('broadcast','reading-list')
-            feeds.append((re.search('/([^/]*)$', url).group(1),
-                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
-        return feeds
+import urllib, re, mechanize
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre import __appname__
+
+class GoogleReaderUber(BasicNewsRecipe):
+    title   = 'Google Reader uber'
+    description = 'Fetches all feeds from your Google Reader account including the uncategorized items.'
+    needs_subscription = True
+    __author__ = 'davec, rollercoaster, Starson17'
+    base_url = 'http://www.google.com/reader/atom/'
+    oldest_article = 365
+    max_articles_per_feed = 250
+    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
+    use_embedded_content = True
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        if self.username is not None and self.password is not None:
+            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
+                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
+            response = br.open('https://www.google.com/accounts/ClientLogin', request)
+            auth = re.search('Auth=(\S*)', response.read()).group(1)
+            cookies = mechanize.CookieJar()
+            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
+            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
+        return br
+
+    def get_feeds(self):
+        feeds = []
+        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
+        for id in soup.findAll(True, attrs={'name':['id']}):
+            url = id.contents[0].replace('broadcast','reading-list')
+            feeds.append((re.search('/([^/]*)$', url).group(1),
+                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
+        return feeds
@@ -0,0 +1,34 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1278773519(BasicNewsRecipe):
+    title          = u'Waco Tribune Herald'
+    __author__ = 'rty'
+    pubisher  = 'A Robinson Media Company'
+    description           = 'Waco, Texas, Newspaper'
+    category              = 'News, Texas, Waco'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds          = [
+	(u'News', u'http://www.wacotrib.com/news/index.rss2'),
+	(u'Sports', u'http://www.wacotrib.com/sports/index.rss2'),
+	(u'AccessWaco', u'http://www.wacotrib.com/accesswaco/index.rss2'),
+	(u'Opinions', u'http://www.wacotrib.com/opinion/index.rss2')
+	]
+
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en'
+    encoding               = 'utf-8'
+    conversion_options = {'linearize_tables':True}
+    masthead_url = 'http://media.wacotrib.com/designimages/wacotrib_logo.jpg'
+    keep_only_tags = [
+                              dict(name='div', attrs={'class':'twoColumn left'}),
+                               ]
+    remove_tags = [
+                    dict(name='div', attrs={'class':'right blueLinks'}),
+                         ]
+    remove_tags_after = [
+                  dict(name='div', attrs={'class':'dottedRule'}),
+                         ]