From 1b6b234c59308ca0819e26ab32839836b36424a7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 23 Jun 2019 10:59:27 +0530 Subject: [PATCH] Various cleanups and fixes for the last py3 merge --- src/calibre/__init__.py | 8 --- src/calibre/ebooks/lrf/html/convert_from.py | 72 ++++++++++--------- src/calibre/ebooks/lrf/html/table.py | 6 +- src/calibre/ebooks/lrf/lrs/convert_from.py | 2 +- src/calibre/ebooks/lrf/objects.py | 2 - src/calibre/ebooks/lrf/pylrs/pylrf.py | 2 +- src/calibre/ebooks/lrf/pylrs/pylrs.py | 8 +-- src/calibre/ebooks/metadata/__init__.py | 10 +-- src/calibre/ebooks/metadata/archive.py | 4 +- src/calibre/ebooks/metadata/book/__init__.py | 41 +++++------ src/calibre/ebooks/metadata/book/base.py | 8 +-- src/calibre/ebooks/metadata/epub.py | 29 ++++---- src/calibre/ebooks/metadata/imp.py | 2 +- src/calibre/ebooks/metadata/kdl.py | 2 +- src/calibre/ebooks/metadata/lit.py | 2 +- src/calibre/ebooks/metadata/meta.py | 20 +++--- src/calibre/ebooks/metadata/mobi.py | 33 ++++----- src/calibre/ebooks/metadata/odt.py | 4 +- src/calibre/ebooks/metadata/pml.py | 2 +- src/calibre/ebooks/metadata/sources/google.py | 4 +- src/calibre/ebooks/metadata/toc.py | 10 +-- src/calibre/ebooks/metadata/txt.py | 6 +- 22 files changed, 132 insertions(+), 145 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 9a232564be..92240d1f77 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -249,14 +249,6 @@ def load_library(name, cdll): return cdll.LoadLibrary(name+'.so') -def filename_to_utf8(name): - '''Return C{name} encoded in utf8. Unhandled characters are replaced. ''' - if isinstance(name, unicode_type): - return name.encode('utf8') - codec = 'cp1252' if iswindows else 'utf8' - return name.decode(codec, 'replace').encode('utf8') - - def extract(path, dir): extractor = None # First use the file header to identify its type diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 7a774b456b..59472c1451 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -1,19 +1,46 @@ +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2008, Kovid Goyal from __future__ import absolute_import, division, print_function, unicode_literals -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' +import copy +import glob +import os +import re +import sys +import tempfile +from collections import deque +from functools import partial +from itertools import chain +from math import ceil, floor + +from calibre import ( + __appname__, entity_to_unicode, fit_image, force_unicode, preferred_encoding +) +from calibre.constants import filesystem_encoding +from calibre.devices.interface import DevicePlugin as Device +from calibre.ebooks import ConversionError +from calibre.ebooks.BeautifulSoup import ( + BeautifulSoup, Comment, Declaration, NavigableString, ProcessingInstruction, Tag +) +from calibre.ebooks.chardet import xml_to_unicode +from calibre.ebooks.lrf import Book +from calibre.ebooks.lrf.html.color_map import lrs_color +from calibre.ebooks.lrf.html.table import Table +from calibre.ebooks.lrf.pylrs.pylrs import ( + CR, BlockSpace, BookSetting, Canvas, CharButton, DropCaps, EmpLine, Image, + ImageBlock, ImageStream, Italic, JumpButton, LrsError, Paragraph, Plot, + RuledLine, Span, Sub, Sup, TextBlock +) +from calibre.ptempfile import PersistentTemporaryFile +from polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type +from polyglot.urllib import unquote, urlparse + """ Code to convert HTML ebooks into LRF ebooks. I am indebted to esperanc for the initial CSS->Xylog Style conversion code and to Falstaff for pylrs. """ -import os, re, sys, copy, glob, tempfile -from collections import deque -from math import ceil, floor -from functools import partial -from polyglot.builtins import string_or_bytes, itervalues, getcwd -from itertools import chain try: from PIL import Image as PILImage @@ -21,25 +48,6 @@ try: except ImportError: import Image as PILImage -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag, \ - NavigableString, Declaration, ProcessingInstruction -from calibre.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \ - TextBlock, ImageBlock, JumpButton, CharButton, \ - Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \ - LrsError, Sup, Sub, EmpLine -from calibre.ebooks.lrf.pylrs.pylrs import Span -from calibre.ebooks.lrf import Book -from calibre.ebooks import ConversionError -from calibre.ebooks.lrf.html.table import Table -from calibre import filename_to_utf8, __appname__, \ - fit_image, preferred_encoding, entity_to_unicode -from calibre.ptempfile import PersistentTemporaryFile -from calibre.devices.interface import DevicePlugin as Device -from calibre.ebooks.lrf.html.color_map import lrs_color -from calibre.ebooks.chardet import xml_to_unicode -from polyglot.builtins import unicode_type -from polyglot.urllib import unquote, urlparse - def update_css(ncss, ocss): for key in ncss.keys(): @@ -577,7 +585,7 @@ class HTMLConverter(object): css = self.tag_css(tag)[0] if ('display' in css and css['display'].lower() == 'none') or ('visibility' in css and css['visibility'].lower() == 'hidden'): return '' - text, alt_text = u'', u'' + text, alt_text = '', '' for c in tag.contents: if limit is not None and len(text) > limit: break @@ -1112,7 +1120,7 @@ class HTMLConverter(object): val /= 2. ans['sidemargin'] = int(val) if 2*int(ans['sidemargin']) >= factor*int(self.current_block.blockStyle.attrs['blockwidth']): - ans['sidemargin'] = (factor*int(self.current_block.blockStyle.attrs['blockwidth'])) // 2 + ans['sidemargin'] = int((factor*int(self.current_block.blockStyle.attrs['blockwidth'])) / 2) for prop in ('topskip', 'footskip', 'sidemargin'): if isinstance(ans[prop], string_or_bytes): @@ -1348,7 +1356,7 @@ class HTMLConverter(object): ''' Ensure padding and text-indent properties are respected ''' text_properties = self.text_properties(tag_css) block_properties = self.block_properties(tag_css) - indent = (float(text_properties['parindent'])//10) * (self.profile.dpi/72) + indent = (float(text_properties['parindent'])/10) * (self.profile.dpi/72) margin = float(block_properties['sidemargin']) # Since we're flattening the block structure, we need to ensure that text # doesn't go off the left edge of the screen @@ -1780,7 +1788,7 @@ class HTMLConverter(object): else: if xpos > 65535: xpos = 65535 - canvases[-1].put_object(block, xpos + delta//2, ypos) + canvases[-1].put_object(block, xpos + int(delta/2), ypos) for canvas in canvases: self.current_page.append(canvas) @@ -1802,7 +1810,7 @@ class HTMLConverter(object): def process_file(path, options, logger): path = os.path.abspath(path) - default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0]) + default_title = force_unicode(os.path.splitext(os.path.basename(path))[0], filesystem_encoding) dirpath = os.path.dirname(path) tpath = '' diff --git a/src/calibre/ebooks/lrf/html/table.py b/src/calibre/ebooks/lrf/html/table.py index 8caa576749..3542056988 100644 --- a/src/calibre/ebooks/lrf/html/table.py +++ b/src/calibre/ebooks/lrf/html/table.py @@ -8,7 +8,7 @@ from calibre.ebooks.lrf.fonts import get_font from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \ CharButton, Plot, Paragraph, \ LrsTextTag -from polyglot.builtins import string_or_bytes, range +from polyglot.builtins import string_or_bytes, range, native_string_type def ceil(num): @@ -17,8 +17,8 @@ def ceil(num): def print_xml(elem): from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter - elem = elem.toElement('utf8') - ew = ElementWriter(elem, sourceEncoding='utf8') + elem = elem.toElement(native_string_type('utf8')) + ew = ElementWriter(elem, sourceEncoding=native_string_type('utf8')) ew.write(sys.stdout) print() diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py index 1350addb60..cb53ad23ce 100644 --- a/src/calibre/ebooks/lrf/lrs/convert_from.py +++ b/src/calibre/ebooks/lrf/lrs/convert_from.py @@ -221,7 +221,7 @@ class LrsParser(object): res = cls.tag_to_string(item) if res: strings.append(res) - return u''.join(strings) + return ''.join(strings) def first_pass(self): info = self.soup.find('bbebxylog').find('bookinformation').find('info') diff --git a/src/calibre/ebooks/lrf/objects.py b/src/calibre/ebooks/lrf/objects.py index 59fc725c0c..67b0b77642 100644 --- a/src/calibre/ebooks/lrf/objects.py +++ b/src/calibre/ebooks/lrf/objects.py @@ -933,8 +933,6 @@ class Text(LRFStream): if isinstance(c, unicode_type): s += c elif c is None: - if c.name == 'P': - in_p = False p = open_containers.pop() s += p.close_html() else: diff --git a/src/calibre/ebooks/lrf/pylrs/pylrf.py b/src/calibre/ebooks/lrf/pylrs/pylrf.py index eea5010d59..bca86f29b3 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrf.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrf.py @@ -645,7 +645,7 @@ class LrfWriter(object): self.tocObjId = 0 self.docInfoXml = "" self.thumbnailEncoding = "JPEG" - self.thumbnailData = "" + self.thumbnailData = b"" self.objects = [] self.objectTable = [] diff --git a/src/calibre/ebooks/lrf/pylrs/pylrs.py b/src/calibre/ebooks/lrf/pylrs/pylrs.py index 84cc3432fa..8816cbc311 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrs.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py @@ -49,12 +49,12 @@ from .pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc, STREAM_FORCE_COMPRESSED) from calibre.utils.date import isoformat -DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set +DEFAULT_SOURCE_ENCODING = "cp1252" # default is us-windows character set DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs from calibre import __appname__, __version__ from calibre import entity_to_unicode -from polyglot.builtins import string_or_bytes, unicode_type, iteritems +from polyglot.builtins import string_or_bytes, unicode_type, iteritems, native_string_type class LrsError(Exception): @@ -620,7 +620,7 @@ class Book(Delegator): _formatXml(root) tree = ElementTree(element=root) - tree.write(f, encoding=outputEncodingName, xml_declaration=True) + tree.write(f, encoding=native_string_type(outputEncodingName), xml_declaration=True) class BookInformation(Delegator): @@ -672,7 +672,7 @@ class Info(Delegator): # NB: generates an encoding attribute, which lrs2lrf does not tree = ElementTree(element=info) f = io.BytesIO() - tree.write(f, encoding='utf-8', xml_declaration=True) + tree.write(f, encoding=native_string_type('utf-8'), xml_declaration=True) xmlInfo = f.getvalue().decode('utf-8') xmlInfo = re.sub(r"\n", "", xmlInfo) xmlInfo = xmlInfo.replace("SumPage>", "Page>") diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index 37b9502265..158cc9108c 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -13,13 +13,13 @@ import os, sys, re from calibre import relpath, guess_type, prints, force_unicode from calibre.utils.config_base import tweaks -from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues +from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues, as_unicode from polyglot.urllib import quote, unquote, urlparse try: _author_pat = re.compile(tweaks['authors_split_regex']) -except: +except Exception: prints('Author split regexp:', tweaks['authors_split_regex'], 'is invalid, using default') _author_pat = re.compile(r'(?i),?\s+(and|with)\s+') @@ -270,7 +270,7 @@ class Resource(object): if self.path is None: return self._href f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment - frag = '#'+quote(f) if self.fragment else '' + frag = '#'+as_unicode(quote(f)) if self.fragment else '' if self.path == basedir: return ''+frag try: @@ -279,7 +279,7 @@ class Resource(object): rpath = self.path if isinstance(rpath, unicode_type): rpath = rpath.encode('utf-8') - return quote(rpath.replace(os.sep, '/'))+frag + return as_unicode(quote(rpath.replace(os.sep, '/')))+frag def set_basedir(self, path): self._basedir = path @@ -436,5 +436,5 @@ def rating_to_stars(value, allow_half_stars=False, star='★', half='½'): r = max(0, min(int(value or 0), 10)) ans = star * (r // 2) if allow_half_stars and r % 2: - ans += half + ans += half return ans diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py index 56414ba0a3..434edf9728 100644 --- a/src/calibre/ebooks/metadata/archive.py +++ b/src/calibre/ebooks/metadata/archive.py @@ -34,7 +34,7 @@ def archive_type(stream): ans = 'rar' try: stream.seek(pos) - except: + except Exception: pass return ans @@ -144,7 +144,7 @@ def get_comic_book_info(d, mi, series_index='volume'): dt = date(puby, 6 if pubm is None else pubm, 15) dt = parse_only_date(unicode_type(dt)) mi.pubdate = dt - except: + except Exception: pass diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py index d1f3c4ff4d..84e8039ab5 100644 --- a/src/calibre/ebooks/metadata/book/__init__.py +++ b/src/calibre/ebooks/metadata/book/__init__.py @@ -11,7 +11,7 @@ All fields must have a NULL value represented as None for simple types, an empty list/dictionary for complex types and (None, None) for cover_data ''' -SOCIAL_METADATA_FIELDS = frozenset([ +SOCIAL_METADATA_FIELDS = frozenset(( 'tags', # Ordered list 'rating', # A floating point number between 0 and 10 'comments', # A simple HTML enabled string @@ -20,17 +20,17 @@ SOCIAL_METADATA_FIELDS = frozenset([ # Of the form { scheme1:value1, scheme2:value2} # For example: {'isbn':'123456789', 'doi':'xxxx', ... } 'identifiers', -]) +)) ''' The list of names that convert to identifiers when in get and set. ''' -TOP_LEVEL_IDENTIFIERS = frozenset([ +TOP_LEVEL_IDENTIFIERS = frozenset(( 'isbn', -]) +)) -PUBLICATION_METADATA_FIELDS = frozenset([ +PUBLICATION_METADATA_FIELDS = frozenset(( 'title', # title must never be None. Should be _('Unknown') # Pseudo field that can be set, but if not set is auto generated # from title and languages @@ -59,28 +59,27 @@ PUBLICATION_METADATA_FIELDS = frozenset([ # image_path which is the path to an image file, encoded # in filesystem_encoding 'thumbnail', - ]) +)) -BOOK_STRUCTURE_FIELDS = frozenset([ +BOOK_STRUCTURE_FIELDS = frozenset(( # These are used by code, Null values are None. 'toc', 'spine', 'guide', 'manifest', - ]) +)) -USER_METADATA_FIELDS = frozenset([ +USER_METADATA_FIELDS = frozenset(( # A dict of dicts similar to field_metadata. Each field description dict # also contains a value field with the key #value#. 'user_metadata', -]) +)) -DEVICE_METADATA_FIELDS = frozenset([ +DEVICE_METADATA_FIELDS = frozenset(( 'device_collections', # Ordered list of strings 'lpath', # Unicode, / separated 'size', # In bytes 'mime', # Mimetype of the book file being represented +)) -]) - -CALIBRE_METADATA_FIELDS = frozenset([ +CALIBRE_METADATA_FIELDS = frozenset(( 'application_id', # An application id, currently set to the db_id. 'db_id', # the calibre primary key of the item. 'formats', # list of formats (extensions) for this book @@ -89,9 +88,7 @@ CALIBRE_METADATA_FIELDS = frozenset([ 'user_categories', # a dict of author to an associated hyperlink 'author_link_map', - - ] -) +)) ALL_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union( PUBLICATION_METADATA_FIELDS).union( @@ -108,13 +105,13 @@ STANDARD_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union( CALIBRE_METADATA_FIELDS) # Metadata fields that smart update must do special processing to copy. -SC_FIELDS_NOT_COPIED = frozenset(['title', 'title_sort', 'authors', +SC_FIELDS_NOT_COPIED = frozenset(('title', 'title_sort', 'authors', 'author_sort', 'author_sort_map', 'cover_data', 'tags', 'languages', - 'identifiers']) + 'identifiers')) # Metadata fields that smart update should copy only if the source is not None -SC_FIELDS_COPY_NOT_NULL = frozenset(['device_collections', 'lpath', 'size', 'comments', 'thumbnail']) +SC_FIELDS_COPY_NOT_NULL = frozenset(('device_collections', 'lpath', 'size', 'comments', 'thumbnail')) # Metadata fields that smart update should copy without special handling SC_COPYABLE_FIELDS = SOCIAL_METADATA_FIELDS.union( @@ -130,6 +127,6 @@ SERIALIZABLE_FIELDS = SOCIAL_METADATA_FIELDS.union( PUBLICATION_METADATA_FIELDS).union( CALIBRE_METADATA_FIELDS).union( DEVICE_METADATA_FIELDS) - \ - frozenset(['device_collections', 'formats', - 'cover_data']) + frozenset(('device_collections', 'formats', + 'cover_data')) # these are rebuilt when needed diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index ff59edaff1..87eba2d388 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -531,11 +531,11 @@ class Metadata(object): if getattr(other, 'cover_data', False): other_cover = other.cover_data[-1] - self_cover = self.cover_data[-1] if self.cover_data else '' + self_cover = self.cover_data[-1] if self.cover_data else b'' if not self_cover: - self_cover = '' + self_cover = b'' if not other_cover: - other_cover = '' + other_cover = b'' if len(other_cover) > len(self_cover): self.cover_data = other.cover_data @@ -595,7 +595,7 @@ class Metadata(object): v = self.series_index if val is None else val try: x = float(v) - except (ValueError, TypeError): + except Exception: x = 1 return fmt_sidx(x) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 153ce4cee9..3773e53fec 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -93,7 +93,7 @@ class OCFReader(OCF): def __init__(self): try: - mimetype = self.open('mimetype').read().decode('utf-8').rstrip() + mimetype = self.read_bytes('mimetype').decode('utf-8').rstrip() if mimetype != OCF.MIMETYPE: print('WARNING: Invalid mimetype declaration', mimetype) except: @@ -123,9 +123,8 @@ class OCFReader(OCF): def encryption_meta(self): if self._encryption_meta_cached is None: try: - with closing(self.open(self.ENCRYPTION_PATH)) as f: - self._encryption_meta_cached = Encryption(f.read()) - except: + self._encryption_meta_cached = Encryption(self.read_bytes(self.ENCRYPTION_PATH)) + except Exception: self._encryption_meta_cached = Encryption(None) return self._encryption_meta_cached @@ -152,7 +151,7 @@ class OCFZipReader(OCFReader): self.root = getcwd() super(OCFZipReader, self).__init__() - def open(self, name, mode='r'): + def open(self, name): if isinstance(self.archive, LocalZipFile): return self.archive.open(name) return io.BytesIO(self.archive.read(name)) @@ -164,7 +163,7 @@ class OCFZipReader(OCFReader): def get_zip_reader(stream, root=None): try: zf = ZipFile(stream, mode='r') - except: + except Exception: stream.seek(0) zf = LocalZipFile(stream) return OCFZipReader(zf, root=root) @@ -176,8 +175,12 @@ class OCFDirReader(OCFReader): self.root = path super(OCFDirReader, self).__init__() - def open(self, path, *args, **kwargs): - return open(os.path.join(self.root, path), *args, **kwargs) + def open(self, path): + return lopen(os.path.join(self.root, path), 'rb') + + def read_bytes(self, path): + with self.open(path) as f: + return f.read() def render_cover(cpage, zf, reader=None): @@ -238,15 +241,9 @@ def get_cover(raster_cover, first_spine_item, reader): if reader.encryption_meta.is_encrypted(raster_cover): return try: - member = zf.getinfo(raster_cover) + return reader.read_bytes(raster_cover) except Exception: pass - else: - f = zf.open(member) - data = f.read() - f.close() - zf.close() - return data return render_cover(first_spine_item, zf, reader=reader) @@ -326,5 +323,5 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_ide if cpath is not None: replacements[cpath].close() os.remove(replacements[cpath].name) - except: + except Exception: pass diff --git a/src/calibre/ebooks/metadata/imp.py b/src/calibre/ebooks/metadata/imp.py index cfd0bed7b2..7dc0378475 100644 --- a/src/calibre/ebooks/metadata/imp.py +++ b/src/calibre/ebooks/metadata/imp.py @@ -9,7 +9,7 @@ import sys from calibre.ebooks.metadata import MetaInformation, string_to_authors from polyglot.builtins import unicode_type -MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG'] +MAGIC = (b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG') def get_metadata(stream): diff --git a/src/calibre/ebooks/metadata/kdl.py b/src/calibre/ebooks/metadata/kdl.py index 97249e0613..8d023f1326 100644 --- a/src/calibre/ebooks/metadata/kdl.py +++ b/src/calibre/ebooks/metadata/kdl.py @@ -79,7 +79,7 @@ def get_series(title, authors, timeout=60): raw = raw.partition('.')[0].strip() try: mi.series_index = int(raw) - except: + except Exception: pass return mi diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index ae568ce0f7..cd197f9941 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -32,7 +32,7 @@ def get_metadata(stream): try: covers.append((litfile.get_file('/data/'+item.internal), ctype)) - except: + except Exception: pass break covers.sort(key=lambda x: len(x[0]), reverse=True) diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index 57e520ad00..f1dcc5d7c2 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -13,18 +13,16 @@ from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata from calibre.ebooks.metadata import MetaInformation, string_to_authors from polyglot.builtins import getcwd, unicode_type -_METADATA_PRIORITIES = [ - 'html', 'htm', 'xhtml', 'xhtm', - 'rtf', 'fb2', 'pdf', 'prc', 'odt', - 'epub', 'lit', 'lrx', 'lrf', 'mobi', - 'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb' - ] - # The priorities for loading metadata from different file types # Higher values should be used to update metadata from lower values METADATA_PRIORITIES = collections.defaultdict(lambda:0) -for i, ext in enumerate(_METADATA_PRIORITIES): - METADATA_PRIORITIES[ext] = i +for i, ext in enumerate(( + 'html', 'htm', 'xhtml', 'xhtm', + 'rtf', 'fb2', 'pdf', 'prc', 'odt', + 'epub', 'lit', 'lrx', 'lrf', 'mobi', + 'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb' +)): + METADATA_PRIORITIES[ext] = i + 1 def path_to_ext(path): @@ -59,7 +57,7 @@ def _metadata_from_formats(formats, force_read_metadata=False, pattern=None): force_read_metadata=force_read_metadata, pattern=pattern) mi.smart_update(newmi) - except: + except Exception: continue if getattr(mi, 'application_id', None) is not None: return mi @@ -219,7 +217,7 @@ def opf_metadata(opfpath): data = f.read() mi.cover_data = (fmt, data) return mi - except: + except Exception: import traceback traceback.print_exc() pass diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index 28eb029fe1..c419695520 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -1,25 +1,26 @@ -''' -Retrieve and modify in-place Mobipocket book metadata. -''' - +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2009, Kovid Goyal from __future__ import absolute_import, division, print_function, unicode_literals -__license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \ - 'Marshall T. Vandegrift ' -__docformat__ = 'restructuredtext en' - -import os, numbers, io +import io +import numbers +import os from struct import pack, unpack from calibre.ebooks import normalize -from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN -from calibre.ebooks.mobi.utils import rescale_image +from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MobiError from calibre.ebooks.mobi.langcodes import iana2mobi +from calibre.ebooks.mobi.utils import rescale_image from calibre.utils.date import now as nowf from calibre.utils.imghdr import what from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 -from polyglot.builtins import unicode_type, range, codepoint_to_chr +from polyglot.builtins import codepoint_to_chr, range, unicode_type + + +''' +Retrieve and modify in-place Mobipocket book metadata. +''' def is_image(ss): @@ -142,7 +143,7 @@ class MetadataUpdater(object): ''' Fetch the DRM keys ''' drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0]) self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0]) - drm_keys = '' + drm_keys = b'' for x in range(self.drm_key_count): base_addr = drm_offset + (x * self.DRM_KEY_SIZE) drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE] @@ -234,7 +235,7 @@ class MetadataUpdater(object): mobi_header_length, = unpack('>L', self.record0[0x14:0x18]) if mobi_header_length == 0xe4: # Patch mobi_header_length to 0xE8 - self.record0[0x17] = "\xe8" + self.record0[0x17] = b"\xe8" self.record0[0xf4:0xf8] = pack('>L', 0xFFFFFFFF) mobi_header_length = 0xe8 @@ -397,7 +398,7 @@ class MetadataUpdater(object): self.original_exth_records.get(501, None) == 'EBOK' and not added_501 and not share_not_sync): from uuid import uuid4 - update_exth_record((113, unicode_type(uuid4()))) + update_exth_record((113, unicode_type(uuid4()).encode(self.codec))) # Add a 112 record with actual UUID if getattr(mi, 'uuid', None): update_exth_record((112, diff --git a/src/calibre/ebooks/metadata/odt.py b/src/calibre/ebooks/metadata/odt.py index cb0d2e23e0..d5c5060ffd 100644 --- a/src/calibre/ebooks/metadata/odt.py +++ b/src/calibre/ebooks/metadata/odt.py @@ -205,7 +205,7 @@ def get_metadata(stream, extract_cover=True): if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) - except ValueError: + except Exception: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) @@ -215,7 +215,7 @@ def get_metadata(stream, extract_cover=True): if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) - except: + except Exception: pass # Do not let an error reading the cover prevent reading other data return mi diff --git a/src/calibre/ebooks/metadata/pml.py b/src/calibre/ebooks/metadata/pml.py index 8f579074d9..24b1108f0b 100644 --- a/src/calibre/ebooks/metadata/pml.py +++ b/src/calibre/ebooks/metadata/pml.py @@ -74,7 +74,7 @@ def get_cover(name, tdir, top_level=False): cover_path = os.path.join(tdir, name + '_img', 'cover.png') if os.path.exists(os.path.join(tdir, name + '_img', 'cover.png')) else os.path.join( os.path.join(tdir, 'images'), 'cover.png') if os.path.exists(os.path.join(os.path.join(tdir, 'images'), 'cover.png')) else '' if cover_path: - with open(cover_path, 'r+b') as cstream: + with open(cover_path, 'rb') as cstream: cover_data = cstream.read() return ('png', cover_data) diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 0f7a42e71c..7853c6153a 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -222,8 +222,10 @@ class GoogleBooks(Source): if not q: return None + if not isinstance(q, bytes): + q = q.encode('utf-8') return BASE_URL + urlencode({ - 'q': q.encode('utf-8'), + 'q': q, 'max-results': 20, 'start-index': 1, 'min-viewability': 'none', diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py index c19f4e35a8..1e1968be2a 100644 --- a/src/calibre/ebooks/metadata/toc.py +++ b/src/calibre/ebooks/metadata/toc.py @@ -18,14 +18,8 @@ from polyglot.urllib import unquote, urlparse NCX_NS = "http://www.daisy.org/z3986/2005/ncx/" CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata" -NSMAP = { - None: NCX_NS, - 'calibre':CALIBRE_NS - } - - +NSMAP = {None: NCX_NS, 'calibre':CALIBRE_NS} E = ElementMaker(namespace=NCX_NS, nsmap=NSMAP) - C = ElementMaker(namespace=CALIBRE_NS, nsmap=NSMAP) @@ -209,7 +203,7 @@ class TOC(list): nl = nl_path(np) if nl: nl = nl[0] - text = u'' + text = '' for txt in txt_path(nl): text += etree.tostring(txt, method='text', encoding='unicode', with_tail=False) diff --git a/src/calibre/ebooks/metadata/txt.py b/src/calibre/ebooks/metadata/txt.py index 1a5c30bcb4..253d844de0 100644 --- a/src/calibre/ebooks/metadata/txt.py +++ b/src/calibre/ebooks/metadata/txt.py @@ -23,15 +23,15 @@ def get_metadata(stream, extract_cover=True): mi = MetaInformation(name or _('Unknown'), [_('Unknown')]) stream.seek(0) - mdata = u'' + mdata = '' for x in range(0, 4): line = stream.readline().decode('utf-8', 'replace') - if line == '': + if not line: break else: mdata += line - mdata = mdata[:100] + mdata = mdata[:1024] mo = re.search('(?u)^[ ]*(?P.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) if mo is not None: