Bill O'Reilly and Sean Hannity by Rob Lammert

2025-07-09 03:04:10 -04:00 · 2010-04-25 16:00:30 -06:00 · 2010-04-25 16:00:30 -06:00 · f8a3a02a6e
commit f8a3a02a6e
parent 1236fd3cc0
3 changed files with 122 additions and 0 deletions
--- a/resources/recipes/billorielly.recipe
+++ b/resources/recipes/billorielly.recipe
@ -0,0 +1,85 @@
+#!/usr/bin/env  python
+
+# ebook-convert.exe c:\billorielly.recipe c:\test -vv
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class BillOReilly(BasicNewsRecipe):
+    cover_url             = 'http://images.billoreilly.com/images/headers/borbanner.jpg'
+    title          		    = u"Bill O'Reilly"
+    __author__ 	          = 'Rob Lammert - rob.lammert[at]gmail.com'
+    description		        = u"Articles from Bill O'Reilly's website and his Fox New's website"
+    oldest_article        = 7.0
+    max_articles_per_feed = 100
+    recursions            = 0
+    encoding              = 'utf8'
+    no_stylesheets        = True
+    remove_javascript     = True
+    #use_embedded_content  = False
+
+
+ #   feeds                 = [
+ #     ('Talking Points Memo', u'http://www.foxnews.com/xmlfeed/column/0,5184,19,00.rss'),
+ #     ('No Spin News', u'http://www.billoreilly.com/blog?rss=true&size=50&useBlurbs=true&categoryID=7')
+ #   ]
+
+    def parse_index(self):
+      feeds = []
+
+      articles_shows = self.bo_parse_shows('http://www.billoreilly.com/show?action=tvShowArchive')
+      articles_columns = self.bo_parse_columns('http://www.billoreilly.com/columns')
+
+      if articles_shows:
+        feeds.append(("O'Reilly Factor", articles_shows))
+
+      if articles_columns:
+        feeds.append(("Newspaper Column", articles_columns))
+
+      return feeds
+
+    def bo_parse_shows(self,url):
+      soup = self.index_to_soup(url)
+      links = soup.find(attrs={'class': 'showLinks'})
+
+      current_articles = []
+      counter = 0
+      for lnk in links.findAllNext(attrs={'class': ['showLinks']}):
+        if counter <= 5:
+          title = self.tag_to_string(lnk)
+          url = lnk.get('href', False)
+
+          if not url or not title:
+            continue
+
+          if url.startswith('/'):
+            url = 'http://www.billoreilly.com'+url+'&dest=/pg/jsp/community/tvshowprint.jsp'
+
+          self.log('\t\tFound article:', title)
+          self.log('\t\t\t', url)
+          current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+          counter += 1
+      return current_articles
+
+    def bo_parse_columns(self,url):
+      soup = self.index_to_soup(url)
+      links = soup.find(attrs={'id': 'bold'})
+
+      current_articles = []
+      counter = 0
+      for lnk in links.findAllNext(attrs={'id': ['bold']}):
+        test = lnk.get('class', False)
+        if counter <= 5 and test == 'defaultLinks':
+          title = self.tag_to_string(lnk)
+          url = lnk.get('href', False)
+
+          if not url or not title:
+            continue
+
+          if url.startswith('/'):
+            url = 'http://www.billoreilly.com'+url+'&printerFriendly=true"'
+
+          self.log('\t\tFound article:', title)
+          self.log('\t\t\t', url)
+          current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+          counter += 1
+      return current_articles
--- a/resources/recipes/seanhannity.recipe
+++ b/resources/recipes/seanhannity.recipe
@ -0,0 +1,35 @@
+#!/usr/bin/env  python
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class SeanHannity(BasicNewsRecipe):
+    cover_url             = 'http://www.hannity.com/images/misc_logo.gif'
+    title          		    = u"Sean Hannity Show"
+    __author__ 	          = 'Rob Lammert - rob.lammert[at]gmail.com'
+    description		        = u"Articles from Sean Hannity's website, www.hannity.com"
+    oldest_article        = 7.0
+    max_articles_per_feed = 100
+    recursions            = 0
+    encoding              = 'utf8'
+    no_stylesheets        = True
+    remove_javascript     = True
+    #use_embedded_content  = False
+
+    remove_tags    = [
+      dict(name='div', attrs={'id':['header','navsprite','topminibarad','headline_bar','shadow','footer']}),
+      dict(name='div', attrs={'class':'rightcolumn'}),
+      dict(name='table', attrs={'id':'audiobox'}),
+      dict(name='a', attrs={'title':['Home','Shows','Guests','Photos']}),
+      dict(name='iframe')
+    ]
+
+    feeds          = [
+	     ('Content Feed', u'http://feeds.feedburner.com/TheSeanHannityShow-AllContent?format=xml')
+    ]
+
+
+
+  #def print_version(self, url):
+    #parts=url.split('/')
+    #return url.replace(url, 'http://www.hannity.com/show/' + parts[4] + '/' + parts[5] + '/' + parts[6] + '?mode=print')
+
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -19,6 +19,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
        self.setupUi(self)
        self.db = db
        self.ids = [db.id(r) for r in rows]
+        self.groupBox.setTitle(_('Editing meta information for %d books') %
+                len(rows))
        self.write_series = False
        self.changed = False
        QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync)