Various cleanups and fixes for the last py3 merge

2025-07-09 03:04:10 -04:00 · 2019-06-23 10:59:27 +05:30 · 2019-06-23 10:59:27 +05:30 · 1b6b234c59
commit 1b6b234c59
parent 2c1a1813ce
22 changed files with 132 additions and 145 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -249,14 +249,6 @@ def load_library(name, cdll):
    return cdll.LoadLibrary(name+'.so')


-def filename_to_utf8(name):
-    '''Return C{name} encoded in utf8. Unhandled characters are replaced. '''
-    if isinstance(name, unicode_type):
-        return name.encode('utf8')
-    codec = 'cp1252' if iswindows else 'utf8'
-    return name.decode(codec, 'replace').encode('utf8')
-
-
 def extract(path, dir):
    extractor = None
    # First use the file header to identify its type
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -1,19 +1,46 @@
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2008, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import absolute_import, division, print_function, unicode_literals

-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+import copy
+import glob
+import os
+import re
+import sys
+import tempfile
+from collections import deque
+from functools import partial
+from itertools import chain
+from math import ceil, floor
+
+from calibre import (
+    __appname__, entity_to_unicode, fit_image, force_unicode, preferred_encoding
+)
+from calibre.constants import filesystem_encoding
+from calibre.devices.interface import DevicePlugin as Device
+from calibre.ebooks import ConversionError
+from calibre.ebooks.BeautifulSoup import (
+    BeautifulSoup, Comment, Declaration, NavigableString, ProcessingInstruction, Tag
+)
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.lrf import Book
+from calibre.ebooks.lrf.html.color_map import lrs_color
+from calibre.ebooks.lrf.html.table import Table
+from calibre.ebooks.lrf.pylrs.pylrs import (
+    CR, BlockSpace, BookSetting, Canvas, CharButton, DropCaps, EmpLine, Image,
+    ImageBlock, ImageStream, Italic, JumpButton, LrsError, Paragraph, Plot,
+    RuledLine, Span, Sub, Sup, TextBlock
+)
+from calibre.ptempfile import PersistentTemporaryFile
+from polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
+from polyglot.urllib import unquote, urlparse
+
 """
 Code to convert HTML ebooks into LRF ebooks.

 I am indebted to esperanc for the initial CSS->Xylog Style conversion code
 and to Falstaff for pylrs.
 """
-import os, re, sys, copy, glob, tempfile
-from collections import deque
-from math import ceil, floor
-from functools import partial
-from polyglot.builtins import string_or_bytes, itervalues, getcwd
-from itertools import chain

 try:
    from PIL import Image as PILImage
@ -21,25 +48,6 @@ try:
 except ImportError:
    import Image as PILImage

-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag, \
-                            NavigableString, Declaration, ProcessingInstruction
-from calibre.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
-                TextBlock, ImageBlock, JumpButton, CharButton, \
-                Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
-                LrsError, Sup, Sub, EmpLine
-from calibre.ebooks.lrf.pylrs.pylrs import Span
-from calibre.ebooks.lrf import Book
-from calibre.ebooks import ConversionError
-from calibre.ebooks.lrf.html.table import Table
-from calibre import filename_to_utf8, __appname__, \
-                    fit_image, preferred_encoding, entity_to_unicode
-from calibre.ptempfile import PersistentTemporaryFile
-from calibre.devices.interface import DevicePlugin as Device
-from calibre.ebooks.lrf.html.color_map import lrs_color
-from calibre.ebooks.chardet import xml_to_unicode
-from polyglot.builtins import unicode_type
-from polyglot.urllib import unquote, urlparse
-

 def update_css(ncss, ocss):
    for key in ncss.keys():
@ -577,7 +585,7 @@ class HTMLConverter(object):
        css = self.tag_css(tag)[0]
        if ('display' in css and css['display'].lower() == 'none') or ('visibility' in css and css['visibility'].lower() == 'hidden'):
            return ''
-        text, alt_text = u'', u''
+        text, alt_text = '', ''
        for c in tag.contents:
            if limit is not None and len(text) > limit:
                break
@ -1112,7 +1120,7 @@ class HTMLConverter(object):
            val /= 2.
            ans['sidemargin'] = int(val)
        if 2*int(ans['sidemargin']) >= factor*int(self.current_block.blockStyle.attrs['blockwidth']):
-            ans['sidemargin'] = (factor*int(self.current_block.blockStyle.attrs['blockwidth'])) // 2
+            ans['sidemargin'] = int((factor*int(self.current_block.blockStyle.attrs['blockwidth'])) / 2)

        for prop in ('topskip', 'footskip', 'sidemargin'):
            if isinstance(ans[prop], string_or_bytes):
@ -1348,7 +1356,7 @@ class HTMLConverter(object):
        ''' Ensure padding and text-indent properties are respected '''
        text_properties = self.text_properties(tag_css)
        block_properties = self.block_properties(tag_css)
-        indent = (float(text_properties['parindent'])//10) * (self.profile.dpi/72)
+        indent = (float(text_properties['parindent'])/10) * (self.profile.dpi/72)
        margin = float(block_properties['sidemargin'])
        # Since we're flattening the block structure, we need to ensure that text
        # doesn't go off the left edge of the screen
@ -1780,7 +1788,7 @@ class HTMLConverter(object):
            else:
                if xpos > 65535:
                    xpos = 65535
-                canvases[-1].put_object(block, xpos + delta//2, ypos)
+                canvases[-1].put_object(block, xpos + int(delta/2), ypos)

        for canvas in canvases:
            self.current_page.append(canvas)
@ -1802,7 +1810,7 @@ class HTMLConverter(object):

 def process_file(path, options, logger):
    path = os.path.abspath(path)
-    default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
+    default_title = force_unicode(os.path.splitext(os.path.basename(path))[0], filesystem_encoding)
    dirpath = os.path.dirname(path)

    tpath = ''
--- a/src/calibre/ebooks/lrf/html/table.py
+++ b/src/calibre/ebooks/lrf/html/table.py
@ -8,7 +8,7 @@ from calibre.ebooks.lrf.fonts import get_font
 from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
                                             CharButton, Plot, Paragraph, \
                                             LrsTextTag
-from polyglot.builtins import string_or_bytes, range
+from polyglot.builtins import string_or_bytes, range, native_string_type


 def ceil(num):
@ -17,8 +17,8 @@ def ceil(num):

 def print_xml(elem):
    from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
-    elem = elem.toElement('utf8')
-    ew = ElementWriter(elem, sourceEncoding='utf8')
+    elem = elem.toElement(native_string_type('utf8'))
+    ew = ElementWriter(elem, sourceEncoding=native_string_type('utf8'))
    ew.write(sys.stdout)
    print()

--- a/src/calibre/ebooks/lrf/lrs/convert_from.py
+++ b/src/calibre/ebooks/lrf/lrs/convert_from.py
@ -221,7 +221,7 @@ class LrsParser(object):
                res = cls.tag_to_string(item)
                if res:
                    strings.append(res)
-        return u''.join(strings)
+        return ''.join(strings)

    def first_pass(self):
        info = self.soup.find('bbebxylog').find('bookinformation').find('info')
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -933,8 +933,6 @@ class Text(LRFStream):
            if isinstance(c, unicode_type):
                s += c
            elif c is None:
-                if c.name == 'P':
-                    in_p = False
                p = open_containers.pop()
                s += p.close_html()
            else:
--- a/src/calibre/ebooks/lrf/pylrs/pylrf.py
+++ b/src/calibre/ebooks/lrf/pylrs/pylrf.py
@ -645,7 +645,7 @@ class LrfWriter(object):
        self.tocObjId = 0
        self.docInfoXml = ""
        self.thumbnailEncoding = "JPEG"
-        self.thumbnailData = ""
+        self.thumbnailData = b""
        self.objects = []
        self.objectTable = []

--- a/src/calibre/ebooks/lrf/pylrs/pylrs.py
+++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py
@ -49,12 +49,12 @@ from .pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
        STREAM_FORCE_COMPRESSED)
 from calibre.utils.date import isoformat

-DEFAULT_SOURCE_ENCODING = "cp1252"      # defualt is us-windows character set
+DEFAULT_SOURCE_ENCODING = "cp1252"      # default is us-windows character set
 DEFAULT_GENREADING      = "fs"          # default is yes to both lrf and lrs

 from calibre import __appname__, __version__
 from calibre import entity_to_unicode
-from polyglot.builtins import string_or_bytes, unicode_type, iteritems
+from polyglot.builtins import string_or_bytes, unicode_type, iteritems, native_string_type


 class LrsError(Exception):
@ -620,7 +620,7 @@ class Book(Delegator):

        _formatXml(root)
        tree = ElementTree(element=root)
-        tree.write(f, encoding=outputEncodingName, xml_declaration=True)
+        tree.write(f, encoding=native_string_type(outputEncodingName), xml_declaration=True)


 class BookInformation(Delegator):
@ -672,7 +672,7 @@ class Info(Delegator):
        # NB: generates an encoding attribute, which lrs2lrf does not
        tree = ElementTree(element=info)
        f = io.BytesIO()
-        tree.write(f, encoding='utf-8', xml_declaration=True)
+        tree.write(f, encoding=native_string_type('utf-8'), xml_declaration=True)
        xmlInfo = f.getvalue().decode('utf-8')
        xmlInfo = re.sub(r"<CThumbnail.*?>\n", "", xmlInfo)
        xmlInfo = xmlInfo.replace("SumPage>", "Page>")
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -13,13 +13,13 @@ import os, sys, re

 from calibre import relpath, guess_type, prints, force_unicode
 from calibre.utils.config_base import tweaks
-from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues
+from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues, as_unicode
 from polyglot.urllib import quote, unquote, urlparse


 try:
    _author_pat = re.compile(tweaks['authors_split_regex'])
-except:
+except Exception:
    prints('Author split regexp:', tweaks['authors_split_regex'],
            'is invalid, using default')
    _author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
@ -270,7 +270,7 @@ class Resource(object):
        if self.path is None:
            return self._href
        f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
-        frag = '#'+quote(f) if self.fragment else ''
+        frag = '#'+as_unicode(quote(f)) if self.fragment else ''
        if self.path == basedir:
            return ''+frag
        try:
@ -279,7 +279,7 @@ class Resource(object):
            rpath = self.path
        if isinstance(rpath, unicode_type):
            rpath = rpath.encode('utf-8')
-        return quote(rpath.replace(os.sep, '/'))+frag
+        return as_unicode(quote(rpath.replace(os.sep, '/')))+frag

    def set_basedir(self, path):
        self._basedir = path
@ -436,5 +436,5 @@ def rating_to_stars(value, allow_half_stars=False, star='★', half='½'):
    r = max(0, min(int(value or 0), 10))
    ans = star * (r // 2)
    if allow_half_stars and r % 2:
-            ans += half
+        ans += half
    return ans
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -34,7 +34,7 @@ def archive_type(stream):
        ans = 'rar'
    try:
        stream.seek(pos)
-    except:
+    except Exception:
        pass
    return ans

@ -144,7 +144,7 @@ def get_comic_book_info(d, mi, series_index='volume'):
            dt = date(puby, 6 if pubm is None else pubm, 15)
            dt = parse_only_date(unicode_type(dt))
            mi.pubdate = dt
-        except:
+        except Exception:
            pass


--- a/src/calibre/ebooks/metadata/book/init.py
+++ b/src/calibre/ebooks/metadata/book/init.py
@ -11,7 +11,7 @@ All fields must have a NULL value represented as None for simple types,
 an empty list/dictionary for complex types and (None, None) for cover_data
 '''

-SOCIAL_METADATA_FIELDS = frozenset([
+SOCIAL_METADATA_FIELDS = frozenset((
    'tags',             # Ordered list
    'rating',           # A floating point number between 0 and 10
    'comments',         # A simple HTML enabled string
@ -20,17 +20,17 @@ SOCIAL_METADATA_FIELDS = frozenset([
    # Of the form { scheme1:value1, scheme2:value2}
    # For example: {'isbn':'123456789', 'doi':'xxxx', ... }
    'identifiers',
-])
+))

 '''
 The list of names that convert to identifiers when in get and set.
 '''

-TOP_LEVEL_IDENTIFIERS = frozenset([
+TOP_LEVEL_IDENTIFIERS = frozenset((
    'isbn',
-])
+))

-PUBLICATION_METADATA_FIELDS = frozenset([
+PUBLICATION_METADATA_FIELDS = frozenset((
    'title',            # title must never be None. Should be _('Unknown')
    # Pseudo field that can be set, but if not set is auto generated
    # from title and languages
@ -59,28 +59,27 @@ PUBLICATION_METADATA_FIELDS = frozenset([
    # image_path which is the path to an image file, encoded
    # in filesystem_encoding
    'thumbnail',
-    ])
+))

-BOOK_STRUCTURE_FIELDS = frozenset([
+BOOK_STRUCTURE_FIELDS = frozenset((
    # These are used by code, Null values are None.
    'toc', 'spine', 'guide', 'manifest',
-    ])
+))

-USER_METADATA_FIELDS = frozenset([
+USER_METADATA_FIELDS = frozenset((
    # A dict of dicts similar to field_metadata. Each field description dict
    # also contains a value field with the key #value#.
    'user_metadata',
-])
+))

-DEVICE_METADATA_FIELDS = frozenset([
+DEVICE_METADATA_FIELDS = frozenset((
    'device_collections',   # Ordered list of strings
    'lpath',                # Unicode, / separated
    'size',                 # In bytes
    'mime',                 # Mimetype of the book file being represented
+))

-])
-
-CALIBRE_METADATA_FIELDS = frozenset([
+CALIBRE_METADATA_FIELDS = frozenset((
    'application_id',   # An application id, currently set to the db_id.
    'db_id',            # the calibre primary key of the item.
    'formats',          # list of formats (extensions) for this book
@ -89,9 +88,7 @@ CALIBRE_METADATA_FIELDS = frozenset([
    'user_categories',
    # a dict of author to an associated hyperlink
    'author_link_map',
-
-    ]
-)
+))

 ALL_METADATA_FIELDS =      SOCIAL_METADATA_FIELDS.union(
                           PUBLICATION_METADATA_FIELDS).union(
@ -108,13 +105,13 @@ STANDARD_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
                           CALIBRE_METADATA_FIELDS)

 # Metadata fields that smart update must do special processing to copy.
-SC_FIELDS_NOT_COPIED =     frozenset(['title', 'title_sort', 'authors',
+SC_FIELDS_NOT_COPIED =     frozenset(('title', 'title_sort', 'authors',
                                      'author_sort', 'author_sort_map',
                                      'cover_data', 'tags', 'languages',
-                                      'identifiers'])
+                                      'identifiers'))

 # Metadata fields that smart update should copy only if the source is not None
-SC_FIELDS_COPY_NOT_NULL =  frozenset(['device_collections', 'lpath', 'size', 'comments', 'thumbnail'])
+SC_FIELDS_COPY_NOT_NULL =  frozenset(('device_collections', 'lpath', 'size', 'comments', 'thumbnail'))

 # Metadata fields that smart update should copy without special handling
 SC_COPYABLE_FIELDS =       SOCIAL_METADATA_FIELDS.union(
@ -130,6 +127,6 @@ SERIALIZABLE_FIELDS =      SOCIAL_METADATA_FIELDS.union(
                           PUBLICATION_METADATA_FIELDS).union(
                           CALIBRE_METADATA_FIELDS).union(
                           DEVICE_METADATA_FIELDS) - \
-                           frozenset(['device_collections', 'formats',
-                               'cover_data'])
+                           frozenset(('device_collections', 'formats',
+                               'cover_data'))
 # these are rebuilt when needed
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -531,11 +531,11 @@ class Metadata(object):

            if getattr(other, 'cover_data', False):
                other_cover = other.cover_data[-1]
-                self_cover = self.cover_data[-1] if self.cover_data else ''
+                self_cover = self.cover_data[-1] if self.cover_data else b''
                if not self_cover:
-                    self_cover = ''
+                    self_cover = b''
                if not other_cover:
-                    other_cover = ''
+                    other_cover = b''
                if len(other_cover) > len(self_cover):
                    self.cover_data = other.cover_data

@ -595,7 +595,7 @@ class Metadata(object):
        v = self.series_index if val is None else val
        try:
            x = float(v)
-        except (ValueError, TypeError):
+        except Exception:
            x = 1
        return fmt_sidx(x)

--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -93,7 +93,7 @@ class OCFReader(OCF):

    def __init__(self):
        try:
-            mimetype = self.open('mimetype').read().decode('utf-8').rstrip()
+            mimetype = self.read_bytes('mimetype').decode('utf-8').rstrip()
            if mimetype != OCF.MIMETYPE:
                print('WARNING: Invalid mimetype declaration', mimetype)
        except:
@ -123,9 +123,8 @@ class OCFReader(OCF):
    def encryption_meta(self):
        if self._encryption_meta_cached is None:
            try:
-                with closing(self.open(self.ENCRYPTION_PATH)) as f:
-                    self._encryption_meta_cached = Encryption(f.read())
-            except:
+                self._encryption_meta_cached = Encryption(self.read_bytes(self.ENCRYPTION_PATH))
+            except Exception:
                self._encryption_meta_cached = Encryption(None)
        return self._encryption_meta_cached

@ -152,7 +151,7 @@ class OCFZipReader(OCFReader):
                self.root = getcwd()
        super(OCFZipReader, self).__init__()

-    def open(self, name, mode='r'):
+    def open(self, name):
        if isinstance(self.archive, LocalZipFile):
            return self.archive.open(name)
        return io.BytesIO(self.archive.read(name))
@ -164,7 +163,7 @@ class OCFZipReader(OCFReader):
 def get_zip_reader(stream, root=None):
    try:
        zf = ZipFile(stream, mode='r')
-    except:
+    except Exception:
        stream.seek(0)
        zf = LocalZipFile(stream)
    return OCFZipReader(zf, root=root)
@ -176,8 +175,12 @@ class OCFDirReader(OCFReader):
        self.root = path
        super(OCFDirReader, self).__init__()

-    def open(self, path, *args, **kwargs):
-        return open(os.path.join(self.root, path), *args, **kwargs)
+    def open(self, path):
+        return lopen(os.path.join(self.root, path), 'rb')
+
+    def read_bytes(self, path):
+        with self.open(path) as f:
+            return f.read()


 def render_cover(cpage, zf, reader=None):
@ -238,15 +241,9 @@ def get_cover(raster_cover, first_spine_item, reader):
        if reader.encryption_meta.is_encrypted(raster_cover):
            return
        try:
-            member = zf.getinfo(raster_cover)
+            return reader.read_bytes(raster_cover)
        except Exception:
            pass
-        else:
-            f = zf.open(member)
-            data = f.read()
-            f.close()
-            zf.close()
-            return data

    return render_cover(first_spine_item, zf, reader=reader)

@ -326,5 +323,5 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_ide
        if cpath is not None:
            replacements[cpath].close()
            os.remove(replacements[cpath].name)
-    except:
+    except Exception:
        pass
--- a/src/calibre/ebooks/metadata/imp.py
+++ b/src/calibre/ebooks/metadata/imp.py
@ -9,7 +9,7 @@ import sys
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 from polyglot.builtins import unicode_type

-MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG']
+MAGIC = (b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG')


 def get_metadata(stream):
--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -79,7 +79,7 @@ def get_series(title, authors, timeout=60):
        raw = raw.partition('.')[0].strip()
        try:
            mi.series_index = int(raw)
-        except:
+        except Exception:
            pass
    return mi

--- a/src/calibre/ebooks/metadata/lit.py
+++ b/src/calibre/ebooks/metadata/lit.py
@ -32,7 +32,7 @@ def get_metadata(stream):
                try:
                    covers.append((litfile.get_file('/data/'+item.internal),
                                   ctype))
-                except:
+                except Exception:
                    pass
                break
    covers.sort(key=lambda x: len(x[0]), reverse=True)
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -13,18 +13,16 @@ from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 from polyglot.builtins import getcwd, unicode_type

-_METADATA_PRIORITIES = [
-                       'html', 'htm', 'xhtml', 'xhtm',
-                       'rtf', 'fb2', 'pdf', 'prc', 'odt',
-                       'epub', 'lit', 'lrx', 'lrf', 'mobi',
-                       'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb'
-                      ]
-
 # The priorities for loading metadata from different file types
 # Higher values should be used to update metadata from lower values
 METADATA_PRIORITIES = collections.defaultdict(lambda:0)
-for i, ext in enumerate(_METADATA_PRIORITIES):
-    METADATA_PRIORITIES[ext] = i
+for i, ext in enumerate((
+    'html', 'htm', 'xhtml', 'xhtm',
+    'rtf', 'fb2', 'pdf', 'prc', 'odt',
+    'epub', 'lit', 'lrx', 'lrf', 'mobi',
+    'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb'
+)):
+    METADATA_PRIORITIES[ext] = i + 1


 def path_to_ext(path):
@ -59,7 +57,7 @@ def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
                                     force_read_metadata=force_read_metadata,
                                     pattern=pattern)
                mi.smart_update(newmi)
-            except:
+            except Exception:
                continue
            if getattr(mi, 'application_id', None) is not None:
                return mi
@ -219,7 +217,7 @@ def opf_metadata(opfpath):
                        data = f.read()
                    mi.cover_data = (fmt, data)
            return mi
-    except:
+    except Exception:
        import traceback
        traceback.print_exc()
        pass
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -1,25 +1,26 @@
-'''
-Retrieve and modify in-place Mobipocket book metadata.
-'''
-
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2009, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import absolute_import, division, print_function, unicode_literals

-__license__   = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
-    'Marshall T. Vandegrift <llasram@gmail.com>'
-__docformat__ = 'restructuredtext en'
-
-import os, numbers, io
+import io
+import numbers
+import os
 from struct import pack, unpack

 from calibre.ebooks import normalize
-from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN
-from calibre.ebooks.mobi.utils import rescale_image
+from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MobiError
 from calibre.ebooks.mobi.langcodes import iana2mobi
+from calibre.ebooks.mobi.utils import rescale_image
 from calibre.utils.date import now as nowf
 from calibre.utils.imghdr import what
 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
-from polyglot.builtins import unicode_type, range, codepoint_to_chr
+from polyglot.builtins import codepoint_to_chr, range, unicode_type
+
+
+'''
+Retrieve and modify in-place Mobipocket book metadata.
+'''


 def is_image(ss):
@ -142,7 +143,7 @@ class MetadataUpdater(object):
        ''' Fetch the DRM keys '''
        drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
        self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
-        drm_keys = ''
+        drm_keys = b''
        for x in range(self.drm_key_count):
            base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
            drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
@ -234,7 +235,7 @@ class MetadataUpdater(object):
        mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
        if mobi_header_length == 0xe4:
            # Patch mobi_header_length to 0xE8
-            self.record0[0x17] = "\xe8"
+            self.record0[0x17] = b"\xe8"
            self.record0[0xf4:0xf8] = pack('>L', 0xFFFFFFFF)
            mobi_header_length = 0xe8

@ -397,7 +398,7 @@ class MetadataUpdater(object):
                self.original_exth_records.get(501, None) == 'EBOK' and
                not added_501 and not share_not_sync):
            from uuid import uuid4
-            update_exth_record((113, unicode_type(uuid4())))
+            update_exth_record((113, unicode_type(uuid4()).encode(self.codec)))
        # Add a 112 record with actual UUID
        if getattr(mi, 'uuid', None):
            update_exth_record((112,
--- a/src/calibre/ebooks/metadata/odt.py
+++ b/src/calibre/ebooks/metadata/odt.py
@ -205,7 +205,7 @@ def get_metadata(stream, extract_cover=True):
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
-                except ValueError:
+                except Exception:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
@ -215,7 +215,7 @@ def get_metadata(stream, extract_cover=True):
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
-        except:
+        except Exception:
            pass  # Do not let an error reading the cover prevent reading other data

    return mi
--- a/src/calibre/ebooks/metadata/pml.py
+++ b/src/calibre/ebooks/metadata/pml.py
@ -74,7 +74,7 @@ def get_cover(name, tdir, top_level=False):
        cover_path = os.path.join(tdir, name + '_img', 'cover.png') if os.path.exists(os.path.join(tdir, name + '_img', 'cover.png')) else os.path.join(
            os.path.join(tdir, 'images'), 'cover.png') if os.path.exists(os.path.join(os.path.join(tdir, 'images'), 'cover.png')) else ''
    if cover_path:
-        with open(cover_path, 'r+b') as cstream:
+        with open(cover_path, 'rb') as cstream:
            cover_data = cstream.read()

    return ('png', cover_data)
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -222,8 +222,10 @@ class GoogleBooks(Source):

        if not q:
            return None
+        if not isinstance(q, bytes):
+            q = q.encode('utf-8')
        return BASE_URL + urlencode({
-            'q': q.encode('utf-8'),
+            'q': q,
            'max-results': 20,
            'start-index': 1,
            'min-viewability': 'none',
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -18,14 +18,8 @@ from polyglot.urllib import unquote, urlparse

 NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
 CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
-NSMAP = {
-            None: NCX_NS,
-            'calibre':CALIBRE_NS
-            }
-
-
+NSMAP = {None: NCX_NS, 'calibre':CALIBRE_NS}
 E = ElementMaker(namespace=NCX_NS, nsmap=NSMAP)
-
 C = ElementMaker(namespace=CALIBRE_NS, nsmap=NSMAP)


@ -209,7 +203,7 @@ class TOC(list):
            nl = nl_path(np)
            if nl:
                nl = nl[0]
-                text = u''
+                text = ''
                for txt in txt_path(nl):
                    text += etree.tostring(txt, method='text',
                            encoding='unicode', with_tail=False)
--- a/src/calibre/ebooks/metadata/txt.py
+++ b/src/calibre/ebooks/metadata/txt.py
@ -23,15 +23,15 @@ def get_metadata(stream, extract_cover=True):
    mi = MetaInformation(name or _('Unknown'), [_('Unknown')])
    stream.seek(0)

-    mdata = u''
+    mdata = ''
    for x in range(0, 4):
        line = stream.readline().decode('utf-8', 'replace')
-        if line == '':
+        if not line:
            break
        else:
            mdata += line

-    mdata = mdata[:100]
+    mdata = mdata[:1024]

    mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
    if mo is not None: