From f8a3a02a6ed932502b42e3e7e7eb26ba6085eab0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 25 Apr 2010 16:00:30 -0600 Subject: [PATCH] Bill O'Reilly and Sean Hannity by Rob Lammert --- resources/recipes/billorielly.recipe | 85 +++++++++++++++++++++++ resources/recipes/seanhannity.recipe | 35 ++++++++++ src/calibre/gui2/dialogs/metadata_bulk.py | 2 + 3 files changed, 122 insertions(+) create mode 100644 resources/recipes/billorielly.recipe create mode 100644 resources/recipes/seanhannity.recipe diff --git a/resources/recipes/billorielly.recipe b/resources/recipes/billorielly.recipe new file mode 100644 index 0000000000..e5eb700248 --- /dev/null +++ b/resources/recipes/billorielly.recipe @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +# ebook-convert.exe c:\billorielly.recipe c:\test -vv + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class BillOReilly(BasicNewsRecipe): + cover_url = 'http://images.billoreilly.com/images/headers/borbanner.jpg' + title = u"Bill O'Reilly" + __author__ = 'Rob Lammert - rob.lammert[at]gmail.com' + description = u"Articles from Bill O'Reilly's website and his Fox New's website" + oldest_article = 7.0 + max_articles_per_feed = 100 + recursions = 0 + encoding = 'utf8' + no_stylesheets = True + remove_javascript = True + #use_embedded_content = False + + + # feeds = [ + # ('Talking Points Memo', u'http://www.foxnews.com/xmlfeed/column/0,5184,19,00.rss'), + # ('No Spin News', u'http://www.billoreilly.com/blog?rss=true&size=50&useBlurbs=true&categoryID=7') + # ] + + def parse_index(self): + feeds = [] + + articles_shows = self.bo_parse_shows('http://www.billoreilly.com/show?action=tvShowArchive') + articles_columns = self.bo_parse_columns('http://www.billoreilly.com/columns') + + if articles_shows: + feeds.append(("O'Reilly Factor", articles_shows)) + + if articles_columns: + feeds.append(("Newspaper Column", articles_columns)) + + return feeds + + def bo_parse_shows(self,url): + soup = self.index_to_soup(url) + links = soup.find(attrs={'class': 'showLinks'}) + + current_articles = [] + counter = 0 + for lnk in links.findAllNext(attrs={'class': ['showLinks']}): + if counter <= 5: + title = self.tag_to_string(lnk) + url = lnk.get('href', False) + + if not url or not title: + continue + + if url.startswith('/'): + url = 'http://www.billoreilly.com'+url+'&dest=/pg/jsp/community/tvshowprint.jsp' + + self.log('\t\tFound article:', title) + self.log('\t\t\t', url) + current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) + counter += 1 + return current_articles + + def bo_parse_columns(self,url): + soup = self.index_to_soup(url) + links = soup.find(attrs={'id': 'bold'}) + + current_articles = [] + counter = 0 + for lnk in links.findAllNext(attrs={'id': ['bold']}): + test = lnk.get('class', False) + if counter <= 5 and test == 'defaultLinks': + title = self.tag_to_string(lnk) + url = lnk.get('href', False) + + if not url or not title: + continue + + if url.startswith('/'): + url = 'http://www.billoreilly.com'+url+'&printerFriendly=true"' + + self.log('\t\tFound article:', title) + self.log('\t\t\t', url) + current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) + counter += 1 + return current_articles diff --git a/resources/recipes/seanhannity.recipe b/resources/recipes/seanhannity.recipe new file mode 100644 index 0000000000..665b84de1f --- /dev/null +++ b/resources/recipes/seanhannity.recipe @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class SeanHannity(BasicNewsRecipe): + cover_url = 'http://www.hannity.com/images/misc_logo.gif' + title = u"Sean Hannity Show" + __author__ = 'Rob Lammert - rob.lammert[at]gmail.com' + description = u"Articles from Sean Hannity's website, www.hannity.com" + oldest_article = 7.0 + max_articles_per_feed = 100 + recursions = 0 + encoding = 'utf8' + no_stylesheets = True + remove_javascript = True + #use_embedded_content = False + + remove_tags = [ + dict(name='div', attrs={'id':['header','navsprite','topminibarad','headline_bar','shadow','footer']}), + dict(name='div', attrs={'class':'rightcolumn'}), + dict(name='table', attrs={'id':'audiobox'}), + dict(name='a', attrs={'title':['Home','Shows','Guests','Photos']}), + dict(name='iframe') + ] + + feeds = [ + ('Content Feed', u'http://feeds.feedburner.com/TheSeanHannityShow-AllContent?format=xml') + ] + + + + #def print_version(self, url): + #parts=url.split('/') + #return url.replace(url, 'http://www.hannity.com/show/' + parts[4] + '/' + parts[5] + '/' + parts[6] + '?mode=print') + diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index cace547dda..5909f56c28 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -19,6 +19,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): self.setupUi(self) self.db = db self.ids = [db.id(r) for r in rows] + self.groupBox.setTitle(_('Editing meta information for %d books') % + len(rows)) self.write_series = False self.changed = False QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync)