Add profile for the New York Review of Books

2025-07-09 03:04:10 -04:00 · 2007-11-28 22:22:19 +00:00 · 2007-11-28 22:22:19 +00:00 · f97f1c91d2
commit f97f1c91d2
parent 2fea18e1c0
3 changed files with 31 additions and 2 deletions
--- a/src/libprs500/ebooks/lrf/web/convert_from.py
+++ b/src/libprs500/ebooks/lrf/web/convert_from.py
@ -28,8 +28,9 @@ from libprs500.ebooks.lrf.web.profiles.nytimes import NYTimes
 from libprs500.ebooks.lrf.web.profiles.bbc import BBC
 from libprs500.ebooks.lrf.web.profiles.newsweek import Newsweek
 from libprs500.ebooks.lrf.web.profiles.economist import Economist
 from libprs500.ebooks.lrf.web.profiles.newyorkreview import NewYorkReviewOfBooks
-builtin_profiles   = [NYTimes, BBC, Newsweek, Economist]
+builtin_profiles   = [NYTimes, BBC, Newsweek, Economist, NewYorkReviewOfBooks]
 available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles] 
 def option_parser():
--- a/src/libprs500/ebooks/lrf/web/profiles/newyorkreview.py
+++ b/src/libprs500/ebooks/lrf/web/profiles/newyorkreview.py
@ -0,0 +1,23 @@
 from libprs500.ebooks.lrf.web.profiles import DefaultProfile
 import re
 class NewYorkReviewOfBooks(DefaultProfile):
    title = 'New York Review of Books'
    max_recursions = 2
    max_articles_per_feed = 50
    html_description = True
    no_stylesheets = True
    def get_feeds(self):
        return [ ('Current Issue',  'http://feeds.feedburner.com/nybooks') ]
    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
        (r'<meta http-equiv="Content-Type" content="text/html; charset=(\S+)"', lambda match : match.group().replace(match.group(1), 'UTF-8')),
        (r'<body.*?((<div id="article_body">)|(<div id="st-page-maincontent">)|(<div id="containermain">)|(<p class="ap-story-p">)|(<!-- img_nav -->))', lambda match: '<body><div>'),
        (r'((<!-- end article content -->)|(<div id="st-custom-afterpagecontent">)|(<p class="ap-story-p">&copy;)|(<div class="entry-footer">)|(<div id="see_also">)|(<p>Via <a href=)|(<div id="ss_nav">)).*?</html>', lambda match : '</div></body></html>'),
        (r'<a.*?onclick.*?>.*?(<img .*?>)', lambda match: match.group(1),), 
        (r'<div class="nav">.*?<h2>', lambda match: '<h2>'),
        ] ]
--- a/src/libprs500/gui2/main.py
+++ b/src/libprs500/gui2/main.py
@ -119,12 +119,14 @@ class Main(MainWindow, Ui_MainWindow):
        nm.addAction(QIcon(':/images/news/bbc.png'), 'BBC')
        nm.addAction(QIcon(':/images/news/economist.png'), 'Economist')
        nm.addAction(QIcon(':/images/news/newsweek.png'), 'Newsweek')
        nm.addAction(QIcon(':/images/book.svg'), 'New York Review of Books')
        nm.addAction(QIcon(':/images/news/nytimes.png'), 'New York Times')
        QObject.connect(nm.actions()[0], SIGNAL('triggered(bool)'), self.fetch_news_bbc)
        QObject.connect(nm.actions()[1], SIGNAL('triggered(bool)'), self.fetch_news_economist)
        QObject.connect(nm.actions()[2], SIGNAL('triggered(bool)'), self.fetch_news_newsweek)
-        QObject.connect(nm.actions()[3], SIGNAL('triggered(bool)'), self.fetch_news_nytimes)
+        QObject.connect(nm.actions()[3], SIGNAL('triggered(bool)'), self.fetch_news_nyreview)
        QObject.connect(nm.actions()[4], SIGNAL('triggered(bool)'), self.fetch_news_nytimes)
        self.news_menu = nm
        self.action_news.setMenu(nm)
@ -538,6 +540,9 @@ class Main(MainWindow, Ui_MainWindow):
    def fetch_news_economist(self, checked):
        self.fetch_news('economist', 'The Economist')
    def fetch_news_nyreview(self, checked):
        self.fetch_news('newyorkreview', 'New York Review of Books')
    def fetch_news_nytimes(self, checked):
        d = PasswordDialog(self, 'nytimes info dialog', 
                           '<p>Please enter your username and password for nytimes.com<br>If you do not have an account, you can <a href="http://www.nytimes.com/gst/regi.html">register</a> for free.<br>Without a registration, some articles will not be downloaded correctly. Click OK to proceed.')