Bill O'Reilly and Sean Hannity by Rob Lammert

2025-09-11 22:59:10 -04:00 · 2010-04-25 16:00:30 -06:00 · 2010-04-25 16:00:30 -06:00 · f8a3a02a6e
commit f8a3a02a6e
parent 1236fd3cc0
3 changed files with 122 additions and 0 deletions
--- a/resources/recipes/billorielly.recipe
+++ b/resources/recipes/billorielly.recipe
@ -0,0 +1,85 @@
 #!/usr/bin/env  python
 # ebook-convert.exe c:\billorielly.recipe c:\test -vv
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class BillOReilly(BasicNewsRecipe):
    cover_url             = 'http://images.billoreilly.com/images/headers/borbanner.jpg'
    title          		    = u"Bill O'Reilly"
    __author__ 	          = 'Rob Lammert - rob.lammert[at]gmail.com'
    description		        = u"Articles from Bill O'Reilly's website and his Fox New's website"
    oldest_article        = 7.0
    max_articles_per_feed = 100
    recursions            = 0
    encoding              = 'utf8'
    no_stylesheets        = True
    remove_javascript     = True
    #use_embedded_content  = False
 #   feeds                 = [
 #     ('Talking Points Memo', u'http://www.foxnews.com/xmlfeed/column/0,5184,19,00.rss'),
 #     ('No Spin News', u'http://www.billoreilly.com/blog?rss=true&size=50&useBlurbs=true&categoryID=7')
 #   ]
    def parse_index(self):
      feeds = []
      articles_shows = self.bo_parse_shows('http://www.billoreilly.com/show?action=tvShowArchive')
      articles_columns = self.bo_parse_columns('http://www.billoreilly.com/columns')
      if articles_shows:
        feeds.append(("O'Reilly Factor", articles_shows))
      if articles_columns:
        feeds.append(("Newspaper Column", articles_columns))
      return feeds
    def bo_parse_shows(self,url):
      soup = self.index_to_soup(url)
      links = soup.find(attrs={'class': 'showLinks'})
      current_articles = []
      counter = 0
      for lnk in links.findAllNext(attrs={'class': ['showLinks']}):
        if counter <= 5:
          title = self.tag_to_string(lnk)
          url = lnk.get('href', False)
          if not url or not title:
            continue
          if url.startswith('/'):
            url = 'http://www.billoreilly.com'+url+'&dest=/pg/jsp/community/tvshowprint.jsp'
          self.log('\t\tFound article:', title)
          self.log('\t\t\t', url)
          current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
          counter += 1
      return current_articles
    def bo_parse_columns(self,url):
      soup = self.index_to_soup(url)
      links = soup.find(attrs={'id': 'bold'})
      current_articles = []
      counter = 0
      for lnk in links.findAllNext(attrs={'id': ['bold']}):
        test = lnk.get('class', False)
        if counter <= 5 and test == 'defaultLinks':
          title = self.tag_to_string(lnk)
          url = lnk.get('href', False)
          if not url or not title:
            continue
          if url.startswith('/'):
            url = 'http://www.billoreilly.com'+url+'&printerFriendly=true"'
          self.log('\t\tFound article:', title)
          self.log('\t\t\t', url)
          current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
          counter += 1
      return current_articles
--- a/resources/recipes/seanhannity.recipe
+++ b/resources/recipes/seanhannity.recipe
@ -0,0 +1,35 @@
 #!/usr/bin/env  python
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class SeanHannity(BasicNewsRecipe):
    cover_url             = 'http://www.hannity.com/images/misc_logo.gif'
    title          		    = u"Sean Hannity Show"
    __author__ 	          = 'Rob Lammert - rob.lammert[at]gmail.com'
    description		        = u"Articles from Sean Hannity's website, www.hannity.com"
    oldest_article        = 7.0
    max_articles_per_feed = 100
    recursions            = 0
    encoding              = 'utf8'
    no_stylesheets        = True
    remove_javascript     = True
    #use_embedded_content  = False
    remove_tags    = [
      dict(name='div', attrs={'id':['header','navsprite','topminibarad','headline_bar','shadow','footer']}),
      dict(name='div', attrs={'class':'rightcolumn'}),
      dict(name='table', attrs={'id':'audiobox'}),
      dict(name='a', attrs={'title':['Home','Shows','Guests','Photos']}),
      dict(name='iframe')
    ]
    feeds          = [
 	     ('Content Feed', u'http://feeds.feedburner.com/TheSeanHannityShow-AllContent?format=xml')
    ]
  #def print_version(self, url):
    #parts=url.split('/')
    #return url.replace(url, 'http://www.hannity.com/show/' + parts[4] + '/' + parts[5] + '/' + parts[6] + '?mode=print')
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -19,6 +19,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
        self.setupUi(self)
        self.db = db
        self.ids = [db.id(r) for r in rows]
        self.groupBox.setTitle(_('Editing meta information for %d books') %
                len(rows))
        self.write_series = False
        self.changed = False
        QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync)