Pull from trunk

2025-07-09 03:04:10 -04:00 · 2010-05-02 10:28:50 -06:00 · 2010-05-02 10:28:50 -06:00 · 8153b33522
commit 8153b33522
parent a8cc638c6f 8dc171ee3a
4 changed files with 47 additions and 12 deletions
--- a/resources/recipes/jpost.recipe
+++ b/resources/recipes/jpost.recipe
@ -1,3 +1,4 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class JerusalemPost(BasicNewsRecipe):
@ -10,8 +11,6 @@ class JerusalemPost(BasicNewsRecipe):
    __author__ = 'Kovid Goyal'
    max_articles_per_feed = 10
    no_stylesheets = True
-    remove_tags_before = {'class':'jp-grid-content'}
-    remove_tags_after = {'id':'body_val'}

    feeds =  [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
               ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
@ -20,9 +19,24 @@ class JerusalemPost(BasicNewsRecipe):
               ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
          ]

+    remove_tags = [
+            dict(id=lambda x: x and 'ads.' in x),
+            dict(attrs={'class':['printinfo', 'tt1']}),
+            dict(onclick='DoPrint()'),
+            dict(name='input'),
+            ]
+
+    conversion_options = {'linearize_tables':True}
+
    def preprocess_html(self, soup):
-        for x in soup.findAll(name=['form', 'input']):
-            x.name = 'div'
-        for x in soup.findAll('body', style=True):
-            del x['style']
+        for tag in soup.findAll('form'):
+            tag.name = 'div'
        return soup
+
+    def print_version(self, url):
+        m = re.search(r'(ID|id)=(\d+)', url)
+        if m is not None:
+            id_ = m.group(2)
+            return 'http://www.jpost.com/LandedPages/PrintArticle.aspx?id=%s'%id_
+        return url
+
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
 Input plugin for HTML or OPF ebooks.
 '''

-import os, re, sys, uuid
+import os, re, sys, uuid, tempfile
 from urlparse import urlparse, urlunparse
 from urllib import unquote
 from functools import partial
@ -272,6 +272,7 @@ class HTMLInput(InputFormatPlugin):

    def convert(self, stream, opts, file_ext, log,
                accelerators):
+        self._is_case_sensitive = None
        basedir = os.getcwd()
        self.opts = opts

@ -290,6 +291,15 @@ class HTMLInput(InputFormatPlugin):
        return create_oebbook(log, stream.name, opts, self,
                encoding=opts.input_encoding)

+    def is_case_sensitive(self, path):
+        if self._is_case_sensitive is not None:
+            return self._is_case_sensitive
+        if not path or not os.path.exists(path):
+            return islinux or isfreebsd
+        self._is_case_sensitive = os.path.exists(path.lower()) \
+                and os.path.exists(path.upper())
+        return self._is_case_sensitive
+
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import DirContainer, \
@ -343,14 +353,16 @@ class HTMLInput(InputFormatPlugin):

        self.added_resources = {}
        self.log = log
+        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
-            if not (islinux or isfreebsd):
+            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

+        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
@ -415,7 +427,7 @@ class HTMLInput(InputFormatPlugin):
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
-        if not (islinux or isfreebsd):
+        if not self.is_case_sensitive(tempfile.gettempdir()):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@ -220,6 +220,10 @@ class Scheduler(QObject):
        self.cac = QAction(QIcon(I('user_profile.svg')), _('Add a custom news source'), self)
        self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds)
        self.news_menu.addAction(self.cac)
+        self.news_menu.addSeparator()
+        self.all_action = self.news_menu.addAction(
+                _('Download all scheduled new sources'),
+                self.download_all_scheduled)

        self.timer = QTimer(self)
        self.timer.start(int(self.INTERVAL * 60000))
@ -304,7 +308,11 @@ class Scheduler(QObject):
        if urn is not None:
            return self.download(urn)
        for urn in self.recipe_model.scheduled_urns():
-            self.download(urn)
+            if not self.download(urn):
+                break
+
+    def download_all_scheduled(self):
+        self.download_clicked(None)

    def download(self, urn):
        self.lock.lock()
@ -316,12 +324,13 @@ class Scheduler(QObject):
                            'is active'))
                d.setModal(False)
                d.show()
-            return
+            return False
        self.internet_connection_failed = False
        doit = urn not in self.download_queue
        self.lock.unlock()
        if doit:
            self.do_download(urn)
+        return True

    def check(self):
        recipes = self.recipe_model.get_to_be_downloaded_recipes()
--- a/src/calibre/manual/plugins.rst
+++ b/src/calibre/manual/plugins.rst
@ -113,7 +113,7 @@ Metadata download plugins
    When :meth:`fetch` is called, the `self` object will have the following
    useful attributes (each of which may be None)::

-        title, author, publisher, isbn, log, verbose and extra
+        title, book_author, publisher, isbn, log, verbose and extra

    Use these attributes to construct the search query. extra is reserved for
    future use.