mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various cleanups and fixes for the last py3 merge
This commit is contained in:
parent
2c1a1813ce
commit
1b6b234c59
@ -249,14 +249,6 @@ def load_library(name, cdll):
|
||||
return cdll.LoadLibrary(name+'.so')
|
||||
|
||||
|
||||
def filename_to_utf8(name):
|
||||
'''Return C{name} encoded in utf8. Unhandled characters are replaced. '''
|
||||
if isinstance(name, unicode_type):
|
||||
return name.encode('utf8')
|
||||
codec = 'cp1252' if iswindows else 'utf8'
|
||||
return name.decode(codec, 'replace').encode('utf8')
|
||||
|
||||
|
||||
def extract(path, dir):
|
||||
extractor = None
|
||||
# First use the file header to identify its type
|
||||
|
@ -1,19 +1,46 @@
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2008, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import copy
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
from collections import deque
|
||||
from functools import partial
|
||||
from itertools import chain
|
||||
from math import ceil, floor
|
||||
|
||||
from calibre import (
|
||||
__appname__, entity_to_unicode, fit_image, force_unicode, preferred_encoding
|
||||
)
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.devices.interface import DevicePlugin as Device
|
||||
from calibre.ebooks import ConversionError
|
||||
from calibre.ebooks.BeautifulSoup import (
|
||||
BeautifulSoup, Comment, Declaration, NavigableString, ProcessingInstruction, Tag
|
||||
)
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.lrf import Book
|
||||
from calibre.ebooks.lrf.html.color_map import lrs_color
|
||||
from calibre.ebooks.lrf.html.table import Table
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import (
|
||||
CR, BlockSpace, BookSetting, Canvas, CharButton, DropCaps, EmpLine, Image,
|
||||
ImageBlock, ImageStream, Italic, JumpButton, LrsError, Paragraph, Plot,
|
||||
RuledLine, Span, Sub, Sup, TextBlock
|
||||
)
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
|
||||
from polyglot.urllib import unquote, urlparse
|
||||
|
||||
"""
|
||||
Code to convert HTML ebooks into LRF ebooks.
|
||||
|
||||
I am indebted to esperanc for the initial CSS->Xylog Style conversion code
|
||||
and to Falstaff for pylrs.
|
||||
"""
|
||||
import os, re, sys, copy, glob, tempfile
|
||||
from collections import deque
|
||||
from math import ceil, floor
|
||||
from functools import partial
|
||||
from polyglot.builtins import string_or_bytes, itervalues, getcwd
|
||||
from itertools import chain
|
||||
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
@ -21,25 +48,6 @@ try:
|
||||
except ImportError:
|
||||
import Image as PILImage
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag, \
|
||||
NavigableString, Declaration, ProcessingInstruction
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
|
||||
TextBlock, ImageBlock, JumpButton, CharButton, \
|
||||
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
|
||||
LrsError, Sup, Sub, EmpLine
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import Span
|
||||
from calibre.ebooks.lrf import Book
|
||||
from calibre.ebooks import ConversionError
|
||||
from calibre.ebooks.lrf.html.table import Table
|
||||
from calibre import filename_to_utf8, __appname__, \
|
||||
fit_image, preferred_encoding, entity_to_unicode
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.devices.interface import DevicePlugin as Device
|
||||
from calibre.ebooks.lrf.html.color_map import lrs_color
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from polyglot.builtins import unicode_type
|
||||
from polyglot.urllib import unquote, urlparse
|
||||
|
||||
|
||||
def update_css(ncss, ocss):
|
||||
for key in ncss.keys():
|
||||
@ -577,7 +585,7 @@ class HTMLConverter(object):
|
||||
css = self.tag_css(tag)[0]
|
||||
if ('display' in css and css['display'].lower() == 'none') or ('visibility' in css and css['visibility'].lower() == 'hidden'):
|
||||
return ''
|
||||
text, alt_text = u'', u''
|
||||
text, alt_text = '', ''
|
||||
for c in tag.contents:
|
||||
if limit is not None and len(text) > limit:
|
||||
break
|
||||
@ -1112,7 +1120,7 @@ class HTMLConverter(object):
|
||||
val /= 2.
|
||||
ans['sidemargin'] = int(val)
|
||||
if 2*int(ans['sidemargin']) >= factor*int(self.current_block.blockStyle.attrs['blockwidth']):
|
||||
ans['sidemargin'] = (factor*int(self.current_block.blockStyle.attrs['blockwidth'])) // 2
|
||||
ans['sidemargin'] = int((factor*int(self.current_block.blockStyle.attrs['blockwidth'])) / 2)
|
||||
|
||||
for prop in ('topskip', 'footskip', 'sidemargin'):
|
||||
if isinstance(ans[prop], string_or_bytes):
|
||||
@ -1348,7 +1356,7 @@ class HTMLConverter(object):
|
||||
''' Ensure padding and text-indent properties are respected '''
|
||||
text_properties = self.text_properties(tag_css)
|
||||
block_properties = self.block_properties(tag_css)
|
||||
indent = (float(text_properties['parindent'])//10) * (self.profile.dpi/72)
|
||||
indent = (float(text_properties['parindent'])/10) * (self.profile.dpi/72)
|
||||
margin = float(block_properties['sidemargin'])
|
||||
# Since we're flattening the block structure, we need to ensure that text
|
||||
# doesn't go off the left edge of the screen
|
||||
@ -1780,7 +1788,7 @@ class HTMLConverter(object):
|
||||
else:
|
||||
if xpos > 65535:
|
||||
xpos = 65535
|
||||
canvases[-1].put_object(block, xpos + delta//2, ypos)
|
||||
canvases[-1].put_object(block, xpos + int(delta/2), ypos)
|
||||
|
||||
for canvas in canvases:
|
||||
self.current_page.append(canvas)
|
||||
@ -1802,7 +1810,7 @@ class HTMLConverter(object):
|
||||
|
||||
def process_file(path, options, logger):
|
||||
path = os.path.abspath(path)
|
||||
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
|
||||
default_title = force_unicode(os.path.splitext(os.path.basename(path))[0], filesystem_encoding)
|
||||
dirpath = os.path.dirname(path)
|
||||
|
||||
tpath = ''
|
||||
|
@ -8,7 +8,7 @@ from calibre.ebooks.lrf.fonts import get_font
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
|
||||
CharButton, Plot, Paragraph, \
|
||||
LrsTextTag
|
||||
from polyglot.builtins import string_or_bytes, range
|
||||
from polyglot.builtins import string_or_bytes, range, native_string_type
|
||||
|
||||
|
||||
def ceil(num):
|
||||
@ -17,8 +17,8 @@ def ceil(num):
|
||||
|
||||
def print_xml(elem):
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
|
||||
elem = elem.toElement('utf8')
|
||||
ew = ElementWriter(elem, sourceEncoding='utf8')
|
||||
elem = elem.toElement(native_string_type('utf8'))
|
||||
ew = ElementWriter(elem, sourceEncoding=native_string_type('utf8'))
|
||||
ew.write(sys.stdout)
|
||||
print()
|
||||
|
||||
|
@ -221,7 +221,7 @@ class LrsParser(object):
|
||||
res = cls.tag_to_string(item)
|
||||
if res:
|
||||
strings.append(res)
|
||||
return u''.join(strings)
|
||||
return ''.join(strings)
|
||||
|
||||
def first_pass(self):
|
||||
info = self.soup.find('bbebxylog').find('bookinformation').find('info')
|
||||
|
@ -933,8 +933,6 @@ class Text(LRFStream):
|
||||
if isinstance(c, unicode_type):
|
||||
s += c
|
||||
elif c is None:
|
||||
if c.name == 'P':
|
||||
in_p = False
|
||||
p = open_containers.pop()
|
||||
s += p.close_html()
|
||||
else:
|
||||
|
@ -645,7 +645,7 @@ class LrfWriter(object):
|
||||
self.tocObjId = 0
|
||||
self.docInfoXml = ""
|
||||
self.thumbnailEncoding = "JPEG"
|
||||
self.thumbnailData = ""
|
||||
self.thumbnailData = b""
|
||||
self.objects = []
|
||||
self.objectTable = []
|
||||
|
||||
|
@ -49,12 +49,12 @@ from .pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
|
||||
STREAM_FORCE_COMPRESSED)
|
||||
from calibre.utils.date import isoformat
|
||||
|
||||
DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set
|
||||
DEFAULT_SOURCE_ENCODING = "cp1252" # default is us-windows character set
|
||||
DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
||||
|
||||
from calibre import __appname__, __version__
|
||||
from calibre import entity_to_unicode
|
||||
from polyglot.builtins import string_or_bytes, unicode_type, iteritems
|
||||
from polyglot.builtins import string_or_bytes, unicode_type, iteritems, native_string_type
|
||||
|
||||
|
||||
class LrsError(Exception):
|
||||
@ -620,7 +620,7 @@ class Book(Delegator):
|
||||
|
||||
_formatXml(root)
|
||||
tree = ElementTree(element=root)
|
||||
tree.write(f, encoding=outputEncodingName, xml_declaration=True)
|
||||
tree.write(f, encoding=native_string_type(outputEncodingName), xml_declaration=True)
|
||||
|
||||
|
||||
class BookInformation(Delegator):
|
||||
@ -672,7 +672,7 @@ class Info(Delegator):
|
||||
# NB: generates an encoding attribute, which lrs2lrf does not
|
||||
tree = ElementTree(element=info)
|
||||
f = io.BytesIO()
|
||||
tree.write(f, encoding='utf-8', xml_declaration=True)
|
||||
tree.write(f, encoding=native_string_type('utf-8'), xml_declaration=True)
|
||||
xmlInfo = f.getvalue().decode('utf-8')
|
||||
xmlInfo = re.sub(r"<CThumbnail.*?>\n", "", xmlInfo)
|
||||
xmlInfo = xmlInfo.replace("SumPage>", "Page>")
|
||||
|
@ -13,13 +13,13 @@ import os, sys, re
|
||||
|
||||
from calibre import relpath, guess_type, prints, force_unicode
|
||||
from calibre.utils.config_base import tweaks
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues
|
||||
from polyglot.builtins import codepoint_to_chr, unicode_type, range, map, zip, getcwd, iteritems, itervalues, as_unicode
|
||||
from polyglot.urllib import quote, unquote, urlparse
|
||||
|
||||
|
||||
try:
|
||||
_author_pat = re.compile(tweaks['authors_split_regex'])
|
||||
except:
|
||||
except Exception:
|
||||
prints('Author split regexp:', tweaks['authors_split_regex'],
|
||||
'is invalid, using default')
|
||||
_author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
|
||||
@ -270,7 +270,7 @@ class Resource(object):
|
||||
if self.path is None:
|
||||
return self._href
|
||||
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
|
||||
frag = '#'+quote(f) if self.fragment else ''
|
||||
frag = '#'+as_unicode(quote(f)) if self.fragment else ''
|
||||
if self.path == basedir:
|
||||
return ''+frag
|
||||
try:
|
||||
@ -279,7 +279,7 @@ class Resource(object):
|
||||
rpath = self.path
|
||||
if isinstance(rpath, unicode_type):
|
||||
rpath = rpath.encode('utf-8')
|
||||
return quote(rpath.replace(os.sep, '/'))+frag
|
||||
return as_unicode(quote(rpath.replace(os.sep, '/')))+frag
|
||||
|
||||
def set_basedir(self, path):
|
||||
self._basedir = path
|
||||
@ -436,5 +436,5 @@ def rating_to_stars(value, allow_half_stars=False, star='★', half='½'):
|
||||
r = max(0, min(int(value or 0), 10))
|
||||
ans = star * (r // 2)
|
||||
if allow_half_stars and r % 2:
|
||||
ans += half
|
||||
ans += half
|
||||
return ans
|
||||
|
@ -34,7 +34,7 @@ def archive_type(stream):
|
||||
ans = 'rar'
|
||||
try:
|
||||
stream.seek(pos)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
return ans
|
||||
|
||||
@ -144,7 +144,7 @@ def get_comic_book_info(d, mi, series_index='volume'):
|
||||
dt = date(puby, 6 if pubm is None else pubm, 15)
|
||||
dt = parse_only_date(unicode_type(dt))
|
||||
mi.pubdate = dt
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
|
@ -11,7 +11,7 @@ All fields must have a NULL value represented as None for simple types,
|
||||
an empty list/dictionary for complex types and (None, None) for cover_data
|
||||
'''
|
||||
|
||||
SOCIAL_METADATA_FIELDS = frozenset([
|
||||
SOCIAL_METADATA_FIELDS = frozenset((
|
||||
'tags', # Ordered list
|
||||
'rating', # A floating point number between 0 and 10
|
||||
'comments', # A simple HTML enabled string
|
||||
@ -20,17 +20,17 @@ SOCIAL_METADATA_FIELDS = frozenset([
|
||||
# Of the form { scheme1:value1, scheme2:value2}
|
||||
# For example: {'isbn':'123456789', 'doi':'xxxx', ... }
|
||||
'identifiers',
|
||||
])
|
||||
))
|
||||
|
||||
'''
|
||||
The list of names that convert to identifiers when in get and set.
|
||||
'''
|
||||
|
||||
TOP_LEVEL_IDENTIFIERS = frozenset([
|
||||
TOP_LEVEL_IDENTIFIERS = frozenset((
|
||||
'isbn',
|
||||
])
|
||||
))
|
||||
|
||||
PUBLICATION_METADATA_FIELDS = frozenset([
|
||||
PUBLICATION_METADATA_FIELDS = frozenset((
|
||||
'title', # title must never be None. Should be _('Unknown')
|
||||
# Pseudo field that can be set, but if not set is auto generated
|
||||
# from title and languages
|
||||
@ -59,28 +59,27 @@ PUBLICATION_METADATA_FIELDS = frozenset([
|
||||
# image_path which is the path to an image file, encoded
|
||||
# in filesystem_encoding
|
||||
'thumbnail',
|
||||
])
|
||||
))
|
||||
|
||||
BOOK_STRUCTURE_FIELDS = frozenset([
|
||||
BOOK_STRUCTURE_FIELDS = frozenset((
|
||||
# These are used by code, Null values are None.
|
||||
'toc', 'spine', 'guide', 'manifest',
|
||||
])
|
||||
))
|
||||
|
||||
USER_METADATA_FIELDS = frozenset([
|
||||
USER_METADATA_FIELDS = frozenset((
|
||||
# A dict of dicts similar to field_metadata. Each field description dict
|
||||
# also contains a value field with the key #value#.
|
||||
'user_metadata',
|
||||
])
|
||||
))
|
||||
|
||||
DEVICE_METADATA_FIELDS = frozenset([
|
||||
DEVICE_METADATA_FIELDS = frozenset((
|
||||
'device_collections', # Ordered list of strings
|
||||
'lpath', # Unicode, / separated
|
||||
'size', # In bytes
|
||||
'mime', # Mimetype of the book file being represented
|
||||
))
|
||||
|
||||
])
|
||||
|
||||
CALIBRE_METADATA_FIELDS = frozenset([
|
||||
CALIBRE_METADATA_FIELDS = frozenset((
|
||||
'application_id', # An application id, currently set to the db_id.
|
||||
'db_id', # the calibre primary key of the item.
|
||||
'formats', # list of formats (extensions) for this book
|
||||
@ -89,9 +88,7 @@ CALIBRE_METADATA_FIELDS = frozenset([
|
||||
'user_categories',
|
||||
# a dict of author to an associated hyperlink
|
||||
'author_link_map',
|
||||
|
||||
]
|
||||
)
|
||||
))
|
||||
|
||||
ALL_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
|
||||
PUBLICATION_METADATA_FIELDS).union(
|
||||
@ -108,13 +105,13 @@ STANDARD_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
|
||||
CALIBRE_METADATA_FIELDS)
|
||||
|
||||
# Metadata fields that smart update must do special processing to copy.
|
||||
SC_FIELDS_NOT_COPIED = frozenset(['title', 'title_sort', 'authors',
|
||||
SC_FIELDS_NOT_COPIED = frozenset(('title', 'title_sort', 'authors',
|
||||
'author_sort', 'author_sort_map',
|
||||
'cover_data', 'tags', 'languages',
|
||||
'identifiers'])
|
||||
'identifiers'))
|
||||
|
||||
# Metadata fields that smart update should copy only if the source is not None
|
||||
SC_FIELDS_COPY_NOT_NULL = frozenset(['device_collections', 'lpath', 'size', 'comments', 'thumbnail'])
|
||||
SC_FIELDS_COPY_NOT_NULL = frozenset(('device_collections', 'lpath', 'size', 'comments', 'thumbnail'))
|
||||
|
||||
# Metadata fields that smart update should copy without special handling
|
||||
SC_COPYABLE_FIELDS = SOCIAL_METADATA_FIELDS.union(
|
||||
@ -130,6 +127,6 @@ SERIALIZABLE_FIELDS = SOCIAL_METADATA_FIELDS.union(
|
||||
PUBLICATION_METADATA_FIELDS).union(
|
||||
CALIBRE_METADATA_FIELDS).union(
|
||||
DEVICE_METADATA_FIELDS) - \
|
||||
frozenset(['device_collections', 'formats',
|
||||
'cover_data'])
|
||||
frozenset(('device_collections', 'formats',
|
||||
'cover_data'))
|
||||
# these are rebuilt when needed
|
||||
|
@ -531,11 +531,11 @@ class Metadata(object):
|
||||
|
||||
if getattr(other, 'cover_data', False):
|
||||
other_cover = other.cover_data[-1]
|
||||
self_cover = self.cover_data[-1] if self.cover_data else ''
|
||||
self_cover = self.cover_data[-1] if self.cover_data else b''
|
||||
if not self_cover:
|
||||
self_cover = ''
|
||||
self_cover = b''
|
||||
if not other_cover:
|
||||
other_cover = ''
|
||||
other_cover = b''
|
||||
if len(other_cover) > len(self_cover):
|
||||
self.cover_data = other.cover_data
|
||||
|
||||
@ -595,7 +595,7 @@ class Metadata(object):
|
||||
v = self.series_index if val is None else val
|
||||
try:
|
||||
x = float(v)
|
||||
except (ValueError, TypeError):
|
||||
except Exception:
|
||||
x = 1
|
||||
return fmt_sidx(x)
|
||||
|
||||
|
@ -93,7 +93,7 @@ class OCFReader(OCF):
|
||||
|
||||
def __init__(self):
|
||||
try:
|
||||
mimetype = self.open('mimetype').read().decode('utf-8').rstrip()
|
||||
mimetype = self.read_bytes('mimetype').decode('utf-8').rstrip()
|
||||
if mimetype != OCF.MIMETYPE:
|
||||
print('WARNING: Invalid mimetype declaration', mimetype)
|
||||
except:
|
||||
@ -123,9 +123,8 @@ class OCFReader(OCF):
|
||||
def encryption_meta(self):
|
||||
if self._encryption_meta_cached is None:
|
||||
try:
|
||||
with closing(self.open(self.ENCRYPTION_PATH)) as f:
|
||||
self._encryption_meta_cached = Encryption(f.read())
|
||||
except:
|
||||
self._encryption_meta_cached = Encryption(self.read_bytes(self.ENCRYPTION_PATH))
|
||||
except Exception:
|
||||
self._encryption_meta_cached = Encryption(None)
|
||||
return self._encryption_meta_cached
|
||||
|
||||
@ -152,7 +151,7 @@ class OCFZipReader(OCFReader):
|
||||
self.root = getcwd()
|
||||
super(OCFZipReader, self).__init__()
|
||||
|
||||
def open(self, name, mode='r'):
|
||||
def open(self, name):
|
||||
if isinstance(self.archive, LocalZipFile):
|
||||
return self.archive.open(name)
|
||||
return io.BytesIO(self.archive.read(name))
|
||||
@ -164,7 +163,7 @@ class OCFZipReader(OCFReader):
|
||||
def get_zip_reader(stream, root=None):
|
||||
try:
|
||||
zf = ZipFile(stream, mode='r')
|
||||
except:
|
||||
except Exception:
|
||||
stream.seek(0)
|
||||
zf = LocalZipFile(stream)
|
||||
return OCFZipReader(zf, root=root)
|
||||
@ -176,8 +175,12 @@ class OCFDirReader(OCFReader):
|
||||
self.root = path
|
||||
super(OCFDirReader, self).__init__()
|
||||
|
||||
def open(self, path, *args, **kwargs):
|
||||
return open(os.path.join(self.root, path), *args, **kwargs)
|
||||
def open(self, path):
|
||||
return lopen(os.path.join(self.root, path), 'rb')
|
||||
|
||||
def read_bytes(self, path):
|
||||
with self.open(path) as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def render_cover(cpage, zf, reader=None):
|
||||
@ -238,15 +241,9 @@ def get_cover(raster_cover, first_spine_item, reader):
|
||||
if reader.encryption_meta.is_encrypted(raster_cover):
|
||||
return
|
||||
try:
|
||||
member = zf.getinfo(raster_cover)
|
||||
return reader.read_bytes(raster_cover)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
f = zf.open(member)
|
||||
data = f.read()
|
||||
f.close()
|
||||
zf.close()
|
||||
return data
|
||||
|
||||
return render_cover(first_spine_item, zf, reader=reader)
|
||||
|
||||
@ -326,5 +323,5 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_ide
|
||||
if cpath is not None:
|
||||
replacements[cpath].close()
|
||||
os.remove(replacements[cpath].name)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
@ -9,7 +9,7 @@ import sys
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG']
|
||||
MAGIC = (b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG')
|
||||
|
||||
|
||||
def get_metadata(stream):
|
||||
|
@ -79,7 +79,7 @@ def get_series(title, authors, timeout=60):
|
||||
raw = raw.partition('.')[0].strip()
|
||||
try:
|
||||
mi.series_index = int(raw)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
return mi
|
||||
|
||||
|
@ -32,7 +32,7 @@ def get_metadata(stream):
|
||||
try:
|
||||
covers.append((litfile.get_file('/data/'+item.internal),
|
||||
ctype))
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
covers.sort(key=lambda x: len(x[0]), reverse=True)
|
||||
|
@ -13,18 +13,16 @@ from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from polyglot.builtins import getcwd, unicode_type
|
||||
|
||||
_METADATA_PRIORITIES = [
|
||||
'html', 'htm', 'xhtml', 'xhtm',
|
||||
'rtf', 'fb2', 'pdf', 'prc', 'odt',
|
||||
'epub', 'lit', 'lrx', 'lrf', 'mobi',
|
||||
'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb'
|
||||
]
|
||||
|
||||
# The priorities for loading metadata from different file types
|
||||
# Higher values should be used to update metadata from lower values
|
||||
METADATA_PRIORITIES = collections.defaultdict(lambda:0)
|
||||
for i, ext in enumerate(_METADATA_PRIORITIES):
|
||||
METADATA_PRIORITIES[ext] = i
|
||||
for i, ext in enumerate((
|
||||
'html', 'htm', 'xhtml', 'xhtm',
|
||||
'rtf', 'fb2', 'pdf', 'prc', 'odt',
|
||||
'epub', 'lit', 'lrx', 'lrf', 'mobi',
|
||||
'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb'
|
||||
)):
|
||||
METADATA_PRIORITIES[ext] = i + 1
|
||||
|
||||
|
||||
def path_to_ext(path):
|
||||
@ -59,7 +57,7 @@ def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
|
||||
force_read_metadata=force_read_metadata,
|
||||
pattern=pattern)
|
||||
mi.smart_update(newmi)
|
||||
except:
|
||||
except Exception:
|
||||
continue
|
||||
if getattr(mi, 'application_id', None) is not None:
|
||||
return mi
|
||||
@ -219,7 +217,7 @@ def opf_metadata(opfpath):
|
||||
data = f.read()
|
||||
mi.cover_data = (fmt, data)
|
||||
return mi
|
||||
except:
|
||||
except Exception:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
pass
|
||||
|
@ -1,25 +1,26 @@
|
||||
'''
|
||||
Retrieve and modify in-place Mobipocket book metadata.
|
||||
'''
|
||||
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2009, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
|
||||
'Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, numbers, io
|
||||
import io
|
||||
import numbers
|
||||
import os
|
||||
from struct import pack, unpack
|
||||
|
||||
from calibre.ebooks import normalize
|
||||
from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN
|
||||
from calibre.ebooks.mobi.utils import rescale_image
|
||||
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MobiError
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.ebooks.mobi.utils import rescale_image
|
||||
from calibre.utils.date import now as nowf
|
||||
from calibre.utils.imghdr import what
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
from polyglot.builtins import unicode_type, range, codepoint_to_chr
|
||||
from polyglot.builtins import codepoint_to_chr, range, unicode_type
|
||||
|
||||
|
||||
'''
|
||||
Retrieve and modify in-place Mobipocket book metadata.
|
||||
'''
|
||||
|
||||
|
||||
def is_image(ss):
|
||||
@ -142,7 +143,7 @@ class MetadataUpdater(object):
|
||||
''' Fetch the DRM keys '''
|
||||
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
|
||||
self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
|
||||
drm_keys = ''
|
||||
drm_keys = b''
|
||||
for x in range(self.drm_key_count):
|
||||
base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
|
||||
drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
|
||||
@ -234,7 +235,7 @@ class MetadataUpdater(object):
|
||||
mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
|
||||
if mobi_header_length == 0xe4:
|
||||
# Patch mobi_header_length to 0xE8
|
||||
self.record0[0x17] = "\xe8"
|
||||
self.record0[0x17] = b"\xe8"
|
||||
self.record0[0xf4:0xf8] = pack('>L', 0xFFFFFFFF)
|
||||
mobi_header_length = 0xe8
|
||||
|
||||
@ -397,7 +398,7 @@ class MetadataUpdater(object):
|
||||
self.original_exth_records.get(501, None) == 'EBOK' and
|
||||
not added_501 and not share_not_sync):
|
||||
from uuid import uuid4
|
||||
update_exth_record((113, unicode_type(uuid4())))
|
||||
update_exth_record((113, unicode_type(uuid4()).encode(self.codec)))
|
||||
# Add a 112 record with actual UUID
|
||||
if getattr(mi, 'uuid', None):
|
||||
update_exth_record((112,
|
||||
|
@ -205,7 +205,7 @@ def get_metadata(stream, extract_cover=True):
|
||||
if data.get('opf.seriesindex', ''):
|
||||
try:
|
||||
mi.series_index = float(data['opf.seriesindex'])
|
||||
except ValueError:
|
||||
except Exception:
|
||||
mi.series_index = 1.0
|
||||
if data.get('opf.language', ''):
|
||||
cl = canonicalize_lang(data['opf.language'])
|
||||
@ -215,7 +215,7 @@ def get_metadata(stream, extract_cover=True):
|
||||
if not opfnocover:
|
||||
try:
|
||||
read_cover(stream, zin, mi, opfmeta, extract_cover)
|
||||
except:
|
||||
except Exception:
|
||||
pass # Do not let an error reading the cover prevent reading other data
|
||||
|
||||
return mi
|
||||
|
@ -74,7 +74,7 @@ def get_cover(name, tdir, top_level=False):
|
||||
cover_path = os.path.join(tdir, name + '_img', 'cover.png') if os.path.exists(os.path.join(tdir, name + '_img', 'cover.png')) else os.path.join(
|
||||
os.path.join(tdir, 'images'), 'cover.png') if os.path.exists(os.path.join(os.path.join(tdir, 'images'), 'cover.png')) else ''
|
||||
if cover_path:
|
||||
with open(cover_path, 'r+b') as cstream:
|
||||
with open(cover_path, 'rb') as cstream:
|
||||
cover_data = cstream.read()
|
||||
|
||||
return ('png', cover_data)
|
||||
|
@ -222,8 +222,10 @@ class GoogleBooks(Source):
|
||||
|
||||
if not q:
|
||||
return None
|
||||
if not isinstance(q, bytes):
|
||||
q = q.encode('utf-8')
|
||||
return BASE_URL + urlencode({
|
||||
'q': q.encode('utf-8'),
|
||||
'q': q,
|
||||
'max-results': 20,
|
||||
'start-index': 1,
|
||||
'min-viewability': 'none',
|
||||
|
@ -18,14 +18,8 @@ from polyglot.urllib import unquote, urlparse
|
||||
|
||||
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
|
||||
CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
|
||||
NSMAP = {
|
||||
None: NCX_NS,
|
||||
'calibre':CALIBRE_NS
|
||||
}
|
||||
|
||||
|
||||
NSMAP = {None: NCX_NS, 'calibre':CALIBRE_NS}
|
||||
E = ElementMaker(namespace=NCX_NS, nsmap=NSMAP)
|
||||
|
||||
C = ElementMaker(namespace=CALIBRE_NS, nsmap=NSMAP)
|
||||
|
||||
|
||||
@ -209,7 +203,7 @@ class TOC(list):
|
||||
nl = nl_path(np)
|
||||
if nl:
|
||||
nl = nl[0]
|
||||
text = u''
|
||||
text = ''
|
||||
for txt in txt_path(nl):
|
||||
text += etree.tostring(txt, method='text',
|
||||
encoding='unicode', with_tail=False)
|
||||
|
@ -23,15 +23,15 @@ def get_metadata(stream, extract_cover=True):
|
||||
mi = MetaInformation(name or _('Unknown'), [_('Unknown')])
|
||||
stream.seek(0)
|
||||
|
||||
mdata = u''
|
||||
mdata = ''
|
||||
for x in range(0, 4):
|
||||
line = stream.readline().decode('utf-8', 'replace')
|
||||
if line == '':
|
||||
if not line:
|
||||
break
|
||||
else:
|
||||
mdata += line
|
||||
|
||||
mdata = mdata[:100]
|
||||
mdata = mdata[:1024]
|
||||
|
||||
mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
|
||||
if mo is not None:
|
||||
|
Loading…
x
Reference in New Issue
Block a user