From 1b6b234c59308ca0819e26ab32839836b36424a7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 23 Jun 2019 10:59:27 +0530
Subject: [PATCH] Various cleanups and fixes for the last py3 merge

---
 src/calibre/__init__.py                       |  8 ---
 src/calibre/ebooks/lrf/html/convert_from.py   | 72 ++++++++++---------
 src/calibre/ebooks/lrf/html/table.py          |  6 +-
 src/calibre/ebooks/lrf/lrs/convert_from.py    |  2 +-
 src/calibre/ebooks/lrf/objects.py             |  2 -
 src/calibre/ebooks/lrf/pylrs/pylrf.py         |  2 +-
 src/calibre/ebooks/lrf/pylrs/pylrs.py         |  8 +--
 src/calibre/ebooks/metadata/__init__.py       | 10 +--
 src/calibre/ebooks/metadata/archive.py        |  4 +-
 src/calibre/ebooks/metadata/book/__init__.py  | 41 +++++------
 src/calibre/ebooks/metadata/book/base.py      |  8 +--
 src/calibre/ebooks/metadata/epub.py           | 29 ++++----
 src/calibre/ebooks/metadata/imp.py            |  2 +-
 src/calibre/ebooks/metadata/kdl.py            |  2 +-
 src/calibre/ebooks/metadata/lit.py            |  2 +-
 src/calibre/ebooks/metadata/meta.py           | 20 +++---
 src/calibre/ebooks/metadata/mobi.py           | 33 ++++-----
 src/calibre/ebooks/metadata/odt.py            |  4 +-
 src/calibre/ebooks/metadata/pml.py            |  2 +-
 src/calibre/ebooks/metadata/sources/google.py |  4 +-
 src/calibre/ebooks/metadata/toc.py            | 10 +--
 src/calibre/ebooks/metadata/txt.py            |  6 +-
 22 files changed, 132 insertions(+), 145 deletions(-)
diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 9a232564be..92240d1f77 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -249,14 +249,6 @@ def load_library(name, cdll):
     return cdll.LoadLibrary(name+'.so')
 
 
-def filename_to_utf8(name):
-    '''Return C{name} encoded in utf8. Unhandled characters are replaced. '''
-    if isinstance(name, unicode_type):
-        return name.encode('utf8')
-    codec = 'cp1252' if iswindows else 'utf8'
-    return name.decode(codec, 'replace').encode('utf8')
-
-
 def extract(path, dir):
     extractor = None
     # First use the file header to identify its type
diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py
index 7a774b456b..59472c1451 100644
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@@ -1,19 +1,46 @@
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2008, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import absolute_import, division, print_function, unicode_literals
 
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+import copy
+import glob
+import os
+import re
+import sys
+import tempfile
+from collections import deque
+from functools import partial
+from itertools import chain
+from math import ceil, floor
+
+from calibre import (
+    __appname__, entity_to_unicode, fit_image, force_unicode, preferred_encoding
+)
+from calibre.constants import filesystem_encoding
+from calibre.devices.interface import DevicePlugin as Device
+from calibre.ebooks import ConversionError
+from calibre.ebooks.BeautifulSoup import (
+    BeautifulSoup, Comment, Declaration, NavigableString, ProcessingInstruction, Tag
+)
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.lrf import Book
+from calibre.ebooks.lrf.html.color_map import lrs_color
+from calibre.ebooks.lrf.html.table import Table
+from calibre.ebooks.lrf.pylrs.pylrs import (
+    CR, BlockSpace, BookSetting, Canvas, CharButton, DropCaps, EmpLine, Image,
+    ImageBlock, ImageStream, Italic, JumpButton, LrsError, Paragraph, Plot,
+    RuledLine, Span, Sub, Sup, TextBlock
+)
+from calibre.ptempfile import PersistentTemporaryFile
+from polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
+from polyglot.urllib import unquote, urlparse
+
 """
 Code to convert HTML ebooks into LRF ebooks.
 
 I am indebted to esperanc for the initial CSS->Xylog Style conversion code
 and to Falstaff for pylrs.
 """
-import os, re, sys, copy, glob, tempfile
-from collections import deque
-from math import ceil, floor
-from functools import partial
-from polyglot.builtins import string_or_bytes, itervalues, getcwd
-from itertools import chain
 
 try:
     from PIL import Image as PILImage
@@ -21,25 +48,6 @@ try:
 except ImportError:
     import Image as PILImage
 
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag, \
-                            NavigableString, Declaration, ProcessingInstruction
-from calibre.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
-                TextBlock, ImageBlock, JumpButton, CharButton, \
-                Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
-                LrsError, Sup, Sub, EmpLine
-from calibre.ebooks.lrf.pylrs.pylrs import Span
-from calibre.ebooks.lrf import Book
-from calibre.ebooks import ConversionError
-from calibre.ebooks.lrf.html.table import Table
-from calibre import filename_to_utf8, __appname__, \
-                    fit_image, preferred_encoding, entity_to_unicode
-from calibre.ptempfile import PersistentTemporaryFile
-from calibre.devices.interface import DevicePlugin as Device
-from calibre.ebooks.lrf.html.color_map import lrs_color
-from calibre.ebooks.chardet import xml_to_unicode
-from polyglot.builtins import unicode_type
-from polyglot.urllib import unquote, urlparse
-
 
 def update_css(ncss, ocss):
     for key in ncss.keys():
@@ -577,7 +585,7 @@ class HTMLConverter(object):
         css = self.tag_css(tag)[0]
         if ('display' in css and css['display'].lower() == 'none') or ('visibility' in css and css['visibility'].lower() == 'hidden'):
             return ''
-        text, alt_text = u'', u''
+        text, alt_text = '', ''
         for c in tag.contents:
             if limit is not None and len(text) > limit:
                 break
@@ -1112,7 +1120,7 @@ class HTMLConverter(object):
             val /= 2.
             ans['sidemargin'] = int(val)
         if 2*int(ans['sidemargin']) >= factor*int(self.current_block.blockStyle.attrs['blockwidth']):
-            ans['sidemargin'] = (factor*int(self.current_block.blockStyle.attrs['blockwidth'])) // 2
+            ans['sidemargin'] = int((factor*int(self.current_block.blockStyle.attrs['blockwidth'])) / 2)
 
         for prop in ('topskip', 'footskip', 'sidemargin'):
             if isinstance(ans[prop], string_or_bytes):
@@ -1348,7 +1356,7 @@ class HTMLConverter(object):
         ''' Ensure padding and text-indent properties are respected '''
         text_properties = self.text_properties(tag_css)
         block_properties = self.block_properties(tag_css)
-        indent = (float(text_properties['parindent'])//10) * (self.profile.dpi/72)
+        indent = (float(text_properties['parindent'])/10) * (self.profile.dpi/72)
         margin = float(block_properties['sidemargin'])
         # Since we're flattening the block structure, we need to ensure that text
         # doesn't go off the left edge of the screen
@@ -1780,7 +1788,7 @@ class HTMLConverter(object):
             else:
                 if xpos > 65535:
                     xpos = 65535
-                canvases[-1].put_object(block, xpos + delta//2, ypos)
+                canvases[-1].put_object(block, xpos + int(delta/2), ypos)
 
         for canvas in canvases:
             self.current_page.append(canvas)
@@ -1802,7 +1810,7 @@ class HTMLConverter(object):
 
 def process_file(path, options, logger):
     path = os.path.abspath(path)
-    default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
+    default_title = force_unicode(os.path.splitext(os.path.basename(path))[0], filesystem_encoding)
     dirpath = os.path.dirname(path)
 
     tpath = ''
diff --git a/src/calibre/ebooks/lrf/html/table.py b/src/calibre/ebooks/lrf/html/table.py
index 8caa576749..3542056988 100644
--- a/src/calibre/ebooks/lrf/html/table.py
+++ b/src/calibre/ebooks/lrf/html/table.py
@@ -8,7 +8,7 @@ from calibre.ebooks.lrf.fonts import get_font
 from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
                                              CharButton, Plot, Paragraph, \
                                              LrsTextTag
-from polyglot.builtins import string_or_bytes, range
+from polyglot.builtins import string_or_bytes, range, native_string_type
 
 
 def ceil(num):
@@ -17,8 +17,8 @@ def ceil(num):
 
 def print_xml(elem):
     from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
-    elem = elem.toElement('utf8')
-    ew = ElementWriter(elem, sourceEncoding='utf8')
+    elem = elem.toElement(native_string_type('utf8'))
+    ew = ElementWriter(elem, sourceEncoding=native_string_type('utf8'))
     ew.write(sys.stdout)
     print()
 
diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py
index 1350addb60..cb53ad23ce 100644
--- a/src/calibre/ebooks/lrf/lrs/convert_from.py
+++ b/src/calibre/ebooks/lrf/lrs/convert_from.py
@@ -221,7 +221,7 @@ class LrsParser(object):
                 res = cls.tag_to_string(item)
                 if res:
                     strings.append(res)
-        return u''.join(strings)
+        return ''.join(strings)
 
     def first_pass(self):
         info = self.soup.find('bbebxylog').find('bookinformation').find('info')
diff --git a/src/calibre/ebooks/lrf/objects.py b/src/calibre/ebooks/lrf/objects.py
index 59fc725c0c..67b0b77642 100644
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@@ -933,8 +933,6 @@ class Text(LRFStream):
             if isinstance(c, unicode_type):
                 s += c
             elif c is None:
-                if c.name == 'P':
-                    in_p = False
                 p = open_containers.pop()
                 s += p.close_html()
             else:
diff --git a/src/calibre/ebooks/lrf/pylrs/pylrf.py b/src/calibre/ebooks/lrf/pylrs/pylrf.py
index eea5010d59..bca86f29b3 100644
--- a/src/calibre/ebooks/lrf/pylrs/pylrf.py
+++ b/src/calibre/ebooks/lrf/pylrs/pylrf.py
@@ -645,7 +645,7 @@ class LrfWriter(object):
         self.tocObjId = 0
         self.docInfoXml = ""
         self.thumbnailEncoding = "JPEG"
-        self.thumbnailData = ""
+        self.thumbnailData = b""
         self.objects = []
         self.objectTable = []
 
diff --git a/src/calibre/ebooks/lrf/pylrs/pylrs.py b/src/calibre/ebooks/lrf/pylrs/pylrs.py
index 84cc3432fa..8816cbc311 100644
--- a/src/calibre/ebooks/lrf/pylrs/pylrs.py
+++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py
@@ -49,12 +49,12 @@ from .pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
         STREAM_FORCE_COMPRESSED)
 from calibre.utils.date import isoformat
 
-DEFAULT_SOURCE_ENCODING = "cp1252"      # defualt is us-windows character set
+DEFAULT_SOURCE_ENCODING = "cp1252"      # default is us-windows character set
 DEFAULT_GENREADING      = "fs"          # default is yes to both lrf and lrs
 
 from calibre import __appname__, __version__
 from calibre import entity_to_unicode
-from polyglot.builtins import string_or_bytes, unicode_type, iteritems
+from polyglot.builtins import string_or_bytes, unicode_type, iteritems, native_string_type
 
 
 class LrsError(Exception):
@@ -620,7 +620,7 @@ class Book(Delegator):
 
         _formatXml(root)
         tree = ElementTree(element=root)
-        tree.write(f, encoding=outputEncodingName, xml_declaration=True)
+        tree.write(f, encoding=native_string_type(outputEncodingName), xml_declaration=True)
 
 
 class BookInformation(Delegator):
@@ -672,7 +672,7 @@ class Info(Delegator):
         # NB: generates an encoding attribute, which lrs2lrf does not
         tree = ElementTree(element=info)
         f = io.BytesIO()
-        tree.write(f, encoding='utf-8', xml_declaration=True)
+        tree.write(f, encoding=native_string_type('utf-8'), xml_declaration=True)
         xmlInfo = f.getvalue().decode('utf-8')
         xmlInfo = re.sub(r"<CThumbnail.*?>\n", "", xmlInfo)
         xmlInfo = xmlInfo.replace("SumPage>", "Page>")
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index 37b9502265..158cc9108c 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -13,13 +13,13 @@ import os, sys, re
 
 from calibre import relpath, guess_type, prints, force_unicode
 from calibre.utils.config_base import tweaks
-from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues
+from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues, as_unicode
 from polyglot.urllib import quote, unquote, urlparse
 
 
 try:
     _author_pat = re.compile(tweaks['authors_split_regex'])
-except:
+except Exception:
     prints('Author split regexp:', tweaks['authors_split_regex'],
             'is invalid, using default')
     _author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
@@ -270,7 +270,7 @@ class Resource(object):
         if self.path is None:
             return self._href
         f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
-        frag = '#'+quote(f) if self.fragment else ''
+        frag = '#'+as_unicode(quote(f)) if self.fragment else ''
         if self.path == basedir:
             return ''+frag
         try:
@@ -279,7 +279,7 @@ class Resource(object):
             rpath = self.path
         if isinstance(rpath, unicode_type):
             rpath = rpath.encode('utf-8')
-        return quote(rpath.replace(os.sep, '/'))+frag
+        return as_unicode(quote(rpath.replace(os.sep, '/')))+frag
 
     def set_basedir(self, path):
         self._basedir = path
@@ -436,5 +436,5 @@ def rating_to_stars(value, allow_half_stars=False, star='★', half='½'):
     r = max(0, min(int(value or 0), 10))
     ans = star * (r // 2)
     if allow_half_stars and r % 2:
-            ans += half
+        ans += half
     return ans
diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py
index 56414ba0a3..434edf9728 100644
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@@ -34,7 +34,7 @@ def archive_type(stream):
         ans = 'rar'
     try:
         stream.seek(pos)
-    except:
+    except Exception:
         pass
     return ans
 
@@ -144,7 +144,7 @@ def get_comic_book_info(d, mi, series_index='volume'):
             dt = date(puby, 6 if pubm is None else pubm, 15)
             dt = parse_only_date(unicode_type(dt))
             mi.pubdate = dt
-        except:
+        except Exception:
             pass
 
 
diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py
index d1f3c4ff4d..84e8039ab5 100644
--- a/src/calibre/ebooks/metadata/book/__init__.py
+++ b/src/calibre/ebooks/metadata/book/__init__.py
@@ -11,7 +11,7 @@ All fields must have a NULL value represented as None for simple types,
 an empty list/dictionary for complex types and (None, None) for cover_data
 '''
 
-SOCIAL_METADATA_FIELDS = frozenset([
+SOCIAL_METADATA_FIELDS = frozenset((
     'tags',             # Ordered list
     'rating',           # A floating point number between 0 and 10
     'comments',         # A simple HTML enabled string
@@ -20,17 +20,17 @@ SOCIAL_METADATA_FIELDS = frozenset([
     # Of the form { scheme1:value1, scheme2:value2}
     # For example: {'isbn':'123456789', 'doi':'xxxx', ... }
     'identifiers',
-])
+))
 
 '''
 The list of names that convert to identifiers when in get and set.
 '''
 
-TOP_LEVEL_IDENTIFIERS = frozenset([
+TOP_LEVEL_IDENTIFIERS = frozenset((
     'isbn',
-])
+))
 
-PUBLICATION_METADATA_FIELDS = frozenset([
+PUBLICATION_METADATA_FIELDS = frozenset((
     'title',            # title must never be None. Should be _('Unknown')
     # Pseudo field that can be set, but if not set is auto generated
     # from title and languages
@@ -59,28 +59,27 @@ PUBLICATION_METADATA_FIELDS = frozenset([
     # image_path which is the path to an image file, encoded
     # in filesystem_encoding
     'thumbnail',
-    ])
+))
 
-BOOK_STRUCTURE_FIELDS = frozenset([
+BOOK_STRUCTURE_FIELDS = frozenset((
     # These are used by code, Null values are None.
     'toc', 'spine', 'guide', 'manifest',
-    ])
+))
 
-USER_METADATA_FIELDS = frozenset([
+USER_METADATA_FIELDS = frozenset((
     # A dict of dicts similar to field_metadata. Each field description dict
     # also contains a value field with the key #value#.
     'user_metadata',
-])
+))
 
-DEVICE_METADATA_FIELDS = frozenset([
+DEVICE_METADATA_FIELDS = frozenset((
     'device_collections',   # Ordered list of strings
     'lpath',                # Unicode, / separated
     'size',                 # In bytes
     'mime',                 # Mimetype of the book file being represented
+))
 
-])
-
-CALIBRE_METADATA_FIELDS = frozenset([
+CALIBRE_METADATA_FIELDS = frozenset((
     'application_id',   # An application id, currently set to the db_id.
     'db_id',            # the calibre primary key of the item.
     'formats',          # list of formats (extensions) for this book
@@ -89,9 +88,7 @@ CALIBRE_METADATA_FIELDS = frozenset([
     'user_categories',
     # a dict of author to an associated hyperlink
     'author_link_map',
-
-    ]
-)
+))
 
 ALL_METADATA_FIELDS =      SOCIAL_METADATA_FIELDS.union(
                            PUBLICATION_METADATA_FIELDS).union(
@@ -108,13 +105,13 @@ STANDARD_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
                            CALIBRE_METADATA_FIELDS)
 
 # Metadata fields that smart update must do special processing to copy.
-SC_FIELDS_NOT_COPIED =     frozenset(['title', 'title_sort', 'authors',
+SC_FIELDS_NOT_COPIED =     frozenset(('title', 'title_sort', 'authors',
                                       'author_sort', 'author_sort_map',
                                       'cover_data', 'tags', 'languages',
-                                      'identifiers'])
+                                      'identifiers'))
 
 # Metadata fields that smart update should copy only if the source is not None
-SC_FIELDS_COPY_NOT_NULL =  frozenset(['device_collections', 'lpath', 'size', 'comments', 'thumbnail'])
+SC_FIELDS_COPY_NOT_NULL =  frozenset(('device_collections', 'lpath', 'size', 'comments', 'thumbnail'))
 
 # Metadata fields that smart update should copy without special handling
 SC_COPYABLE_FIELDS =       SOCIAL_METADATA_FIELDS.union(
@@ -130,6 +127,6 @@ SERIALIZABLE_FIELDS =      SOCIAL_METADATA_FIELDS.union(
                            PUBLICATION_METADATA_FIELDS).union(
                            CALIBRE_METADATA_FIELDS).union(
                            DEVICE_METADATA_FIELDS) - \
-                           frozenset(['device_collections', 'formats',
-                               'cover_data'])
+                           frozenset(('device_collections', 'formats',
+                               'cover_data'))
 # these are rebuilt when needed
diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index ff59edaff1..87eba2d388 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -531,11 +531,11 @@ class Metadata(object):
 
             if getattr(other, 'cover_data', False):
                 other_cover = other.cover_data[-1]
-                self_cover = self.cover_data[-1] if self.cover_data else ''
+                self_cover = self.cover_data[-1] if self.cover_data else b''
                 if not self_cover:
-                    self_cover = ''
+                    self_cover = b''
                 if not other_cover:
-                    other_cover = ''
+                    other_cover = b''
                 if len(other_cover) > len(self_cover):
                     self.cover_data = other.cover_data
 
@@ -595,7 +595,7 @@ class Metadata(object):
         v = self.series_index if val is None else val
         try:
             x = float(v)
-        except (ValueError, TypeError):
+        except Exception:
             x = 1
         return fmt_sidx(x)
 
diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py
index 153ce4cee9..3773e53fec 100644
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@@ -93,7 +93,7 @@ class OCFReader(OCF):
 
     def __init__(self):
         try:
-            mimetype = self.open('mimetype').read().decode('utf-8').rstrip()
+            mimetype = self.read_bytes('mimetype').decode('utf-8').rstrip()
             if mimetype != OCF.MIMETYPE:
                 print('WARNING: Invalid mimetype declaration', mimetype)
         except:
@@ -123,9 +123,8 @@ class OCFReader(OCF):
     def encryption_meta(self):
         if self._encryption_meta_cached is None:
             try:
-                with closing(self.open(self.ENCRYPTION_PATH)) as f:
-                    self._encryption_meta_cached = Encryption(f.read())
-            except:
+                self._encryption_meta_cached = Encryption(self.read_bytes(self.ENCRYPTION_PATH))
+            except Exception:
                 self._encryption_meta_cached = Encryption(None)
         return self._encryption_meta_cached
 
@@ -152,7 +151,7 @@ class OCFZipReader(OCFReader):
                 self.root = getcwd()
         super(OCFZipReader, self).__init__()
 
-    def open(self, name, mode='r'):
+    def open(self, name):
         if isinstance(self.archive, LocalZipFile):
             return self.archive.open(name)
         return io.BytesIO(self.archive.read(name))
@@ -164,7 +163,7 @@ class OCFZipReader(OCFReader):
 def get_zip_reader(stream, root=None):
     try:
         zf = ZipFile(stream, mode='r')
-    except:
+    except Exception:
         stream.seek(0)
         zf = LocalZipFile(stream)
     return OCFZipReader(zf, root=root)
@@ -176,8 +175,12 @@ class OCFDirReader(OCFReader):
         self.root = path
         super(OCFDirReader, self).__init__()
 
-    def open(self, path, *args, **kwargs):
-        return open(os.path.join(self.root, path), *args, **kwargs)
+    def open(self, path):
+        return lopen(os.path.join(self.root, path), 'rb')
+
+    def read_bytes(self, path):
+        with self.open(path) as f:
+            return f.read()
 
 
 def render_cover(cpage, zf, reader=None):
@@ -238,15 +241,9 @@ def get_cover(raster_cover, first_spine_item, reader):
         if reader.encryption_meta.is_encrypted(raster_cover):
             return
         try:
-            member = zf.getinfo(raster_cover)
+            return reader.read_bytes(raster_cover)
         except Exception:
             pass
-        else:
-            f = zf.open(member)
-            data = f.read()
-            f.close()
-            zf.close()
-            return data
 
     return render_cover(first_spine_item, zf, reader=reader)
 
@@ -326,5 +323,5 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_ide
         if cpath is not None:
             replacements[cpath].close()
             os.remove(replacements[cpath].name)
-    except:
+    except Exception:
         pass
diff --git a/src/calibre/ebooks/metadata/imp.py b/src/calibre/ebooks/metadata/imp.py
index cfd0bed7b2..7dc0378475 100644
--- a/src/calibre/ebooks/metadata/imp.py
+++ b/src/calibre/ebooks/metadata/imp.py
@@ -9,7 +9,7 @@ import sys
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 from polyglot.builtins import unicode_type
 
-MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG']
+MAGIC = (b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG')
 
 
 def get_metadata(stream):
diff --git a/src/calibre/ebooks/metadata/kdl.py b/src/calibre/ebooks/metadata/kdl.py
index 97249e0613..8d023f1326 100644
--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@@ -79,7 +79,7 @@ def get_series(title, authors, timeout=60):
         raw = raw.partition('.')[0].strip()
         try:
             mi.series_index = int(raw)
-        except:
+        except Exception:
             pass
     return mi
 
diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py
index ae568ce0f7..cd197f9941 100644
--- a/src/calibre/ebooks/metadata/lit.py
+++ b/src/calibre/ebooks/metadata/lit.py
@@ -32,7 +32,7 @@ def get_metadata(stream):
                 try:
                     covers.append((litfile.get_file('/data/'+item.internal),
                                    ctype))
-                except:
+                except Exception:
                     pass
                 break
     covers.sort(key=lambda x: len(x[0]), reverse=True)
diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py
index 57e520ad00..f1dcc5d7c2 100644
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@@ -13,18 +13,16 @@ from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 from polyglot.builtins import getcwd, unicode_type
 
-_METADATA_PRIORITIES = [
-                       'html', 'htm', 'xhtml', 'xhtm',
-                       'rtf', 'fb2', 'pdf', 'prc', 'odt',
-                       'epub', 'lit', 'lrx', 'lrf', 'mobi',
-                       'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb'
-                      ]
-
 # The priorities for loading metadata from different file types
 # Higher values should be used to update metadata from lower values
 METADATA_PRIORITIES = collections.defaultdict(lambda:0)
-for i, ext in enumerate(_METADATA_PRIORITIES):
-    METADATA_PRIORITIES[ext] = i
+for i, ext in enumerate((
+    'html', 'htm', 'xhtml', 'xhtm',
+    'rtf', 'fb2', 'pdf', 'prc', 'odt',
+    'epub', 'lit', 'lrx', 'lrf', 'mobi',
+    'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb'
+)):
+    METADATA_PRIORITIES[ext] = i + 1
 
 
 def path_to_ext(path):
@@ -59,7 +57,7 @@ def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
                                      force_read_metadata=force_read_metadata,
                                      pattern=pattern)
                 mi.smart_update(newmi)
-            except:
+            except Exception:
                 continue
             if getattr(mi, 'application_id', None) is not None:
                 return mi
@@ -219,7 +217,7 @@ def opf_metadata(opfpath):
                         data = f.read()
                     mi.cover_data = (fmt, data)
             return mi
-    except:
+    except Exception:
         import traceback
         traceback.print_exc()
         pass
diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py
index 28eb029fe1..c419695520 100644
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@@ -1,25 +1,26 @@
-'''
-Retrieve and modify in-place Mobipocket book metadata.
-'''
-
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2009, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import absolute_import, division, print_function, unicode_literals
 
-__license__   = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
-    'Marshall T. Vandegrift <llasram@gmail.com>'
-__docformat__ = 'restructuredtext en'
-
-import os, numbers, io
+import io
+import numbers
+import os
 from struct import pack, unpack
 
 from calibre.ebooks import normalize
-from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN
-from calibre.ebooks.mobi.utils import rescale_image
+from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MobiError
 from calibre.ebooks.mobi.langcodes import iana2mobi
+from calibre.ebooks.mobi.utils import rescale_image
 from calibre.utils.date import now as nowf
 from calibre.utils.imghdr import what
 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
-from polyglot.builtins import unicode_type, range, codepoint_to_chr
+from polyglot.builtins import codepoint_to_chr, range, unicode_type
+
+
+'''
+Retrieve and modify in-place Mobipocket book metadata.
+'''
 
 
 def is_image(ss):
@@ -142,7 +143,7 @@ class MetadataUpdater(object):
         ''' Fetch the DRM keys '''
         drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
         self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
-        drm_keys = ''
+        drm_keys = b''
         for x in range(self.drm_key_count):
             base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
             drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
@@ -234,7 +235,7 @@ class MetadataUpdater(object):
         mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
         if mobi_header_length == 0xe4:
             # Patch mobi_header_length to 0xE8
-            self.record0[0x17] = "\xe8"
+            self.record0[0x17] = b"\xe8"
             self.record0[0xf4:0xf8] = pack('>L', 0xFFFFFFFF)
             mobi_header_length = 0xe8
 
@@ -397,7 +398,7 @@ class MetadataUpdater(object):
                 self.original_exth_records.get(501, None) == 'EBOK' and
                 not added_501 and not share_not_sync):
             from uuid import uuid4
-            update_exth_record((113, unicode_type(uuid4())))
+            update_exth_record((113, unicode_type(uuid4()).encode(self.codec)))
         # Add a 112 record with actual UUID
         if getattr(mi, 'uuid', None):
             update_exth_record((112,
diff --git a/src/calibre/ebooks/metadata/odt.py b/src/calibre/ebooks/metadata/odt.py
index cb0d2e23e0..d5c5060ffd 100644
--- a/src/calibre/ebooks/metadata/odt.py
+++ b/src/calibre/ebooks/metadata/odt.py
@@ -205,7 +205,7 @@ def get_metadata(stream, extract_cover=True):
             if data.get('opf.seriesindex', ''):
                 try:
                     mi.series_index = float(data['opf.seriesindex'])
-                except ValueError:
+                except Exception:
                     mi.series_index = 1.0
         if data.get('opf.language', ''):
             cl = canonicalize_lang(data['opf.language'])
@@ -215,7 +215,7 @@ def get_metadata(stream, extract_cover=True):
     if not opfnocover:
         try:
             read_cover(stream, zin, mi, opfmeta, extract_cover)
-        except:
+        except Exception:
             pass  # Do not let an error reading the cover prevent reading other data
 
     return mi
diff --git a/src/calibre/ebooks/metadata/pml.py b/src/calibre/ebooks/metadata/pml.py
index 8f579074d9..24b1108f0b 100644
--- a/src/calibre/ebooks/metadata/pml.py
+++ b/src/calibre/ebooks/metadata/pml.py
@@ -74,7 +74,7 @@ def get_cover(name, tdir, top_level=False):
         cover_path = os.path.join(tdir, name + '_img', 'cover.png') if os.path.exists(os.path.join(tdir, name + '_img', 'cover.png')) else os.path.join(
             os.path.join(tdir, 'images'), 'cover.png') if os.path.exists(os.path.join(os.path.join(tdir, 'images'), 'cover.png')) else ''
     if cover_path:
-        with open(cover_path, 'r+b') as cstream:
+        with open(cover_path, 'rb') as cstream:
             cover_data = cstream.read()
 
     return ('png', cover_data)
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 0f7a42e71c..7853c6153a 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -222,8 +222,10 @@ class GoogleBooks(Source):
 
         if not q:
             return None
+        if not isinstance(q, bytes):
+            q = q.encode('utf-8')
         return BASE_URL + urlencode({
-            'q': q.encode('utf-8'),
+            'q': q,
             'max-results': 20,
             'start-index': 1,
             'min-viewability': 'none',
diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py
index c19f4e35a8..1e1968be2a 100644
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@@ -18,14 +18,8 @@ from polyglot.urllib import unquote, urlparse
 
 NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
 CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
-NSMAP = {
-            None: NCX_NS,
-            'calibre':CALIBRE_NS
-            }
-
-
+NSMAP = {None: NCX_NS, 'calibre':CALIBRE_NS}
 E = ElementMaker(namespace=NCX_NS, nsmap=NSMAP)
-
 C = ElementMaker(namespace=CALIBRE_NS, nsmap=NSMAP)
 
 
@@ -209,7 +203,7 @@ class TOC(list):
             nl = nl_path(np)
             if nl:
                 nl = nl[0]
-                text = u''
+                text = ''
                 for txt in txt_path(nl):
                     text += etree.tostring(txt, method='text',
                             encoding='unicode', with_tail=False)
diff --git a/src/calibre/ebooks/metadata/txt.py b/src/calibre/ebooks/metadata/txt.py
index 1a5c30bcb4..253d844de0 100644
--- a/src/calibre/ebooks/metadata/txt.py
+++ b/src/calibre/ebooks/metadata/txt.py
@@ -23,15 +23,15 @@ def get_metadata(stream, extract_cover=True):
     mi = MetaInformation(name or _('Unknown'), [_('Unknown')])
     stream.seek(0)
 
-    mdata = u''
+    mdata = ''
     for x in range(0, 4):
         line = stream.readline().decode('utf-8', 'replace')
-        if line == '':
+        if not line:
             break
         else:
             mdata += line
 
-    mdata = mdata[:100]
+    mdata = mdata[:1024]
 
     mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
     if mo is not None: