Merge from trunk

2025-07-09 03:04:10 -04:00 · 2013-04-12 10:26:18 +05:30 · 2013-04-12 10:26:18 +05:30 · fd50d44cc2
commit fd50d44cc2
parent f2fbd87e0f cac2d13289
12 changed files with 137 additions and 66 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -1,4 +1,4 @@
-# vim:fileencoding=UTF-8:ts=2:sw=2:sta:et:sts=2:ai
+# vim:fileencoding=utf-8:ts=2:sw=2:sta:et:sts=2:ai
 # Each release can have new features and bug fixes. Each of which
 # must have a title and can optionally have linked tickets and a description.
 # In addition they can have a type field which defaults to minor, but should be major
@ -20,6 +20,66 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.27
+  date: 2013-04-12
+
+  new features:
+    - title: "Metadata download: Add two new sources for covers: Google Image Search and bigbooksearch.com."
+      description: "To enable them go to Preferences->Metadata download and enable the 'Google Image' and 'Big Book Search' sources. Google Images is useful for finding larger covers as well as alternate versions of the cover. Big Book Search searches for alternate covers from amazon.com. It can occasionally find nicer covers than the direct Amazon source. Note that both these sources download multiple covers for a single book. Some of these covers can be wrong (i.e. they may be of a different book or not covers at all, so you should inspect the results and manually pick the best match). When bulk downloading, these sources are only used if the other sources find no covers."
+      type: major
+
+    - title: "Content server: Allow specifying a reestriction to use for the server when embedding it as a WSGI app."
+      tickets: [1167951]
+
+    - title: "Get Books: Add a plugin for the Koobe Polish book store"
+
+    - title: "calibredb add_format: Add an option to not replace existing formats. Also pep8 compliance."
+
+    - title: "Allow restoring of the ORIGINAL_XXX format by right-clicking it in the book details panel"
+ 
+  bug fixes:
+    - title: "AZW3 Input: Do not fail to identify JPEG images with 8BIM headers created with Adobe Photoshop."
+      tickets: [1167985]
+
+    - title: "Amazon metadata download: Ignore Spanish edition entries when searching for a book on amazon.com"
+
+    - title: "TXT Input: When converting a txt file with a Byte Order Mark, remove the Byte Order Mark before further processing as it can cause the first line of the text to be mis-interpreted."
+
+    - title: "Get Books: Fix searching for current book/title/author by right clicking the get books icon"
+
+    - title: "Get Books: Update nexto, gutenberg, and virtualo store plugins for website changes"
+
+    - title: "Amazon metadata download: When downloading from amazon.co.jp handle the 'Black curtain redirect' for adult titles."
+      tickets: [1165628]
+
+    - title: "When extracting zip files do not allow maliciously created zip files to overwrite other files on the system"
+
+    - title: "RTF Input: Handle RTF files with invalid border style specifications"
+      tickets: [1021270]
+
+  improved recipes:
+    - The Escapist
+    - San Francisco Chronicle
+    - The Onion
+    - Fronda
+    - Tom's Hardware
+    - New Yorker
+    - Financial Times UK
+    - Business Week Magazine
+    - Victoria Times
+    - tvxs
+    - The Independent
+
+  new recipes:
+    - title: Economia 
+      author: Manish Bhattarai
+
+    - title: Universe Today 
+      author: seird
+
+    - title: The Galaxy's Edge 
+      author: Krittika Goyal
+
 - version: 0.9.26
  date: 2013-04-05

--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -802,6 +802,12 @@ Downloading from the Internet can sometimes result in a corrupted download. If t
    * Try temporarily disabling your antivirus program (Microsoft Security Essentials, or Kaspersky or Norton or McAfee or whatever). This is most likely the culprit if the upgrade process is hanging in the middle.
    * Try rebooting your computer and running a registry cleaner like `Wise registry cleaner <http://www.wisecleaner.com>`_.
    * Try downloading the installer with an alternate browser. For example if you are using Internet Explorer, try using Firefox or Chrome instead.
+    * If you get an error about a missing DLL on windows, then most likely, the
+      permissions on your temporary folder are incorrect. Go to the folder
+      :file:`C:\\Users\\USERNAME\\AppData\\Local` in Windows explorer and then
+      right click on the :file:`Temp` folder and select :guilabel:`Properties` and go to
+      the :guilabel:`Security` tab. Make sure that your user account has full control 
+      for this folder.
      
 If you still cannot get the installer to work and you are on windows, you can use the `calibre portable install <http://calibre-ebook.com/download_portable>`_, which does not need an installer (it is just a zip file).

--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -41,6 +41,7 @@ class TheIndependentNew(BasicNewsRecipe):
    publication_type        = 'newspaper'
    masthead_url            = 'http://www.independent.co.uk/independent.co.uk/editorial/logo/independent_Masthead.png'
    encoding                = 'utf-8'
+    compress_news_images    = True
    remove_tags             =[
                               dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
--- a/recipes/tvxs.recipe
+++ b/recipes/tvxs.recipe
@ -1,5 +1,6 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

+import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class TVXS(BasicNewsRecipe):
@ -8,19 +9,30 @@ class TVXS(BasicNewsRecipe):
    description            = 'News from Greece'
    max_articles_per_feed  = 100
    oldest_article         = 3
-    simultaneous_downloads = 1
    publisher              = 'TVXS'
-    category               = 'news, GR'
+    category               = 'news, sport, greece'
    language               = 'el'
    encoding               = None
    use_embedded_content = False
    remove_empty_feeds = True
-    #conversion_options     = { 'linearize_tables': True}
+    conversion_options = {'smarten_punctuation': True}
    no_stylesheets         = True
+    publication_type = 'newspaper'
    remove_tags_before     = dict(name='h1',attrs={'class':'print-title'})
    remove_tags_after      = dict(name='div',attrs={'class':'field field-type-relevant-content field-field-relevant-articles'})
-    remove_attributes      = ['width', 'src', 'header', 'footer']
-
+    remove_tags      = [dict(name='div',attrs={'class':'field field-type-relevant-content field-field-relevant-articles'}),
+                        dict(name='div',attrs={'class':'field field-type-filefield field-field-image-gallery'}),
+                        dict(name='div',attrs={'class':'filefield-file'})]
+    remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
+    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
+                 table { width: 100%; } \
+                 td img { display: block; margin: 5px auto; } \
+                 ul { padding-top: 10px; } \
+                 ol { padding-top: 10px; } \
+                 li { padding-top: 5px; padding-bottom: 5px; } \
+                 h1 { text-align: center; font-size: 125%; font-weight: bold; } \
+                 h2, h3, h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
+    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''), (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]

    feeds = [(u'Ελλάδα', 'http://tvxs.gr/feeds/2/feed.xml'),
        (u'Κόσμος', 'http://tvxs.gr/feeds/5/feed.xml'),
@ -35,17 +47,10 @@ class TVXS(BasicNewsRecipe):
        (u'Ιστορία', 'http://tvxs.gr/feeds/1573/feed.xml'),
        (u'Χιούμορ', 'http://tvxs.gr/feeds/692/feed.xml')]

-
    def print_version(self, url):
-        import urllib2, urlparse, StringIO, gzip
-
-        fp = urllib2.urlopen(url)
-        data = fp.read()
-        if fp.info()['content-encoding'] == 'gzip':
-            gzip_data = StringIO.StringIO(data)
-            gzipper = gzip.GzipFile(fileobj=gzip_data)
-            data = gzipper.read()
-        fp.close()
+        br = self.get_browser()
+        response = br.open(url)
+        data = response.read()

        pos_1 = data.find('<a href="/print/')
        if pos_1 == -1:
@ -57,5 +62,5 @@ class TVXS(BasicNewsRecipe):
        pos_1 += len('<a href="')
        new_url = data[pos_1:pos_2]

-        print_url = urlparse.urljoin(url, new_url)
+        print_url = "http://tvxs.gr" + new_url
        return print_url
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 26)
+numeric_version = (0, 9, 27)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -68,7 +68,6 @@ class Resource(object): # {{{
                self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
                self.fragment = url[-1]

-
    def href(self, basedir=None):
        '''
        Return a URL pointing to this resource. If it is a file on the filesystem
@ -180,7 +179,6 @@ class ManifestItem(Resource): # {{{
            self.mime_type = val
        return property(fget=fget, fset=fset)

-
    def __unicode__(self):
        return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)

@ -190,7 +188,6 @@ class ManifestItem(Resource): # {{{
    def __repr__(self):
        return unicode(self)

-
    def __getitem__(self, index):
        if index == 0:
            return self.href()
@ -245,7 +242,6 @@ class Manifest(ResourceCollection): # {{{
        ResourceCollection.__init__(self)
        self.next_id = 1

-
    def item(self, id):
        for i in self:
            if i.id == id:
@ -309,13 +305,10 @@ class Spine(ResourceCollection): # {{{
                continue
        return s

-
-
    def __init__(self, manifest):
        ResourceCollection.__init__(self)
        self.manifest = manifest

-
    def replace(self, start, end, ids):
        '''
        Replace the items between start (inclusive) and end (not inclusive) with
@ -363,7 +356,6 @@ class Guide(ResourceCollection): # {{{
                ans += 'title="%s" '%self.title
            return ans + '/>'

-
    @staticmethod
    def from_opf_guide(references, base_dir=os.getcwdu()):
        coll = Guide()
@ -489,9 +481,9 @@ class OPF(object): # {{{
    MIMETYPE         = 'application/oebps-package+xml'
    PARSER           = etree.XMLParser(recover=True)
    NAMESPACES       = {
-                        None  : "http://www.idpf.org/2007/opf",
-                        'dc'  : "http://purl.org/dc/elements/1.1/",
-                        'opf' : "http://www.idpf.org/2007/opf",
+                        None: "http://www.idpf.org/2007/opf",
+                        'dc': "http://purl.org/dc/elements/1.1/",
+                        'opf': "http://www.idpf.org/2007/opf",
                       }
    META             = '{%s}meta' % NAMESPACES['opf']
    xpn = NAMESPACES.copy()
@ -501,9 +493,10 @@ class OPF(object): # {{{
    CONTENT          = XPath('self::*[re:match(name(), "meta$", "i")]/@content')
    TEXT             = XPath('string()')

-
    metadata_path   = XPath('descendant::*[re:match(name(), "metadata", "i")]')
-    metadata_elem_path = XPath('descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
+    metadata_elem_path = XPath(
+        'descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") '
+        'and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
    title_path      = XPath('descendant::*[re:match(name(), "title", "i")]')
    authors_path    = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut" or (not(@role) and not(@opf:role)))]')
    bkp_path        = XPath('descendant::*[re:match(name(), "contributor", "i") and (@role="bkp" or @opf:role="bkp")]')
@ -640,7 +633,8 @@ class OPF(object): # {{{
                    if 'toc' in item.href().lower():
                        toc = item.path

-            if toc is None: return
+            if toc is None:
+                return
            self.toc = TOC(base_path=self.base_dir)
            is_ncx = getattr(self, 'manifest', None) is not None and \
                     self.manifest.type_for_id(toc) is not None and \
@ -976,7 +970,6 @@ class OPF(object): # {{{

        return property(fget=fget, fset=fset)

-
    @dynamic_property
    def language(self):

@ -990,7 +983,6 @@ class OPF(object): # {{{

        return property(fget=fget, fset=fset)

-
    @dynamic_property
    def languages(self):

@ -1015,7 +1007,6 @@ class OPF(object): # {{{

        return property(fget=fget, fset=fset)

-
    @dynamic_property
    def book_producer(self):

@ -1196,7 +1187,6 @@ class OPFCreator(Metadata):
        if self.cover:
            self.guide.set_cover(self.cover)

-
    def create_manifest(self, entries):
        '''
        Create <manifest>
@ -1615,9 +1605,9 @@ def test_user_metadata():
    from cStringIO import StringIO
    mi = Metadata('Test title', ['test author1', 'test author2'])
    um = {
-        '#myseries': { '#value#': u'test series\xe4', 'datatype':'text',
+        '#myseries': {'#value#': u'test series\xe4', 'datatype':'text',
            'is_multiple': None, 'name': u'My Series'},
-        '#myseries_index': { '#value#': 2.45, 'datatype': 'float',
+        '#myseries_index': {'#value#': 2.45, 'datatype': 'float',
            'is_multiple': None},
        '#mytags': {'#value#':['t1','t2','t3'], 'datatype':'text',
            'is_multiple': '|', 'name': u'My Tags'}
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -51,9 +51,11 @@ def reverse_tag_iter(block):
    end = len(block)
    while True:
        pgt = block.rfind(b'>', 0, end)
-        if pgt == -1: break
+        if pgt == -1:
+            break
        plt = block.rfind(b'<', 0, pgt)
-        if plt == -1: break
+        if plt == -1:
+            break
        yield block[plt:pgt+1]
        end = plt

@ -231,12 +233,12 @@ class Mobi8Reader(object):
            flowpart = self.flows[j]
            nstr = '%04d' % j
            m = svg_tag_pattern.search(flowpart)
-            if m != None:
+            if m is not None:
                # svg
                typ = 'svg'
                start = m.start()
                m2 = image_tag_pattern.search(flowpart)
-                if m2 != None:
+                if m2 is not None:
                    format = 'inline'
                    dir = None
                    fname = None
@ -406,6 +408,10 @@ class Mobi8Reader(object):
            else:
                imgtype = what(None, data)
                if imgtype is None:
+                    from calibre.utils.magick.draw import identify_data
+                    try:
+                        imgtype = identify_data(data)[2]
+                    except Exception:
                        imgtype = 'unknown'
                href = 'images/%05d.%s'%(fname_idx, imgtype)
                with open(href.replace('/', os.sep), 'wb') as f:
--- a/src/calibre/ebooks/mobi/tweak.py
+++ b/src/calibre/ebooks/mobi/tweak.py
@ -72,7 +72,8 @@ def explode(path, dest, question=lambda x:True):
            dest), no_output=True)['result']

 def set_cover(oeb):
-    if 'cover' not in oeb.guide or oeb.metadata['cover']: return
+    if 'cover' not in oeb.guide or oeb.metadata['cover']:
+        return
    cover = oeb.guide['cover']
    if cover.href in oeb.manifest.hrefs:
        item = oeb.manifest.hrefs[cover.href]
@ -95,8 +96,9 @@ def rebuild(src_dir, dest_path):
    if not opf:
        raise ValueError('No OPF file found in %s'%src_dir)
    opf = opf[0]
-    # For debugging, uncomment the following line
-    # def fork_job(a, b, args=None, no_output=True): do_rebuild(*args)
+    # For debugging, uncomment the following two lines
+    # def fork_job(a, b, args=None, no_output=True):
+    #     do_rebuild(*args)
    fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path),
            no_output=True)

--- a/src/calibre/ebooks/mobi/writer2/resources.py
+++ b/src/calibre/ebooks/mobi/writer2/resources.py
@ -69,7 +69,8 @@ class Resources(object):
            cover_href = item.href

        for item in self.oeb.manifest.values():
-            if item.media_type not in OEB_RASTER_IMAGES: continue
+            if item.media_type not in OEB_RASTER_IMAGES:
+                continue
            try:
                data = self.process_image(item.data)
            except:
@ -116,8 +117,8 @@ class Resources(object):
        Add any images that were created after the call to add_resources()
        '''
        for item in self.oeb.manifest.values():
-            if (item.media_type not in OEB_RASTER_IMAGES or item.href in
-                    self.item_map): continue
+            if (item.media_type not in OEB_RASTER_IMAGES or item.href in self.item_map):
+                continue
            try:
                data = self.process_image(item.data)
            except:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -270,7 +270,7 @@ BINARY_MIME    = 'application/octet-stream'

 XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS

-OEB_STYLES        = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
+OEB_STYLES        = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css', 'xhtml/css'])
 OEB_DOCS          = set([XHTML_MIME, 'text/html', OEB_DOC_MIME,
                         'text/x-oeb-document'])
 OEB_RASTER_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME])
--- a/src/calibre/gui2/store/stores/koobe_plugin.py
+++ b/src/calibre/gui2/store/stores/koobe_plugin.py
@ -7,7 +7,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2013, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'

-import re
 import urllib
 from base64 import b64encode
 from contextlib import closing
--- a/src/calibre/utils/imghdr.py
+++ b/src/calibre/utils/imghdr.py
@ -24,7 +24,8 @@ def what(file, h=None):
            if res:
                return res
    finally:
-        if f: f.close()
+        if f:
+            f.close()
    return None


@ -38,7 +39,7 @@ def test_jpeg(h, f):
    """JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
    the original code was failing with some jpegs that included ICC_PROFILE
    data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
-    if (h[6:10] in (b'JFIF', b'Exif')) or (h[:2] == b'\xff\xd8' and b'JFIF' in h[:32]):
+    if (h[6:10] in (b'JFIF', b'Exif')) or (h[:2] == b'\xff\xd8' and (b'JFIF' in h[:32] or b'8BIM' in h[:32])):
        return 'jpeg'

 tests.append(test_jpeg)