mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Fix various problems when editing EPUB files with non-ascii filenames on OS X. Fixes #1317883 [calibre should care of 'decomposed UTF-8' filenames on Darwin platform](https://bugs.launchpad.net/calibre/+bug/1317883)
This commit is contained in:
parent
cad2a50df9
commit
78be882aff
@ -17,9 +17,7 @@ from lxml import etree
|
|||||||
from cssutils import replaceUrls, getUrls
|
from cssutils import replaceUrls, getUrls
|
||||||
|
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
from calibre.constants import isosx
|
from calibre.customize.ui import (plugin_for_input_format, plugin_for_output_format)
|
||||||
from calibre.customize.ui import (plugin_for_input_format,
|
|
||||||
plugin_for_output_format)
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.conversion.plugins.epub_input import (
|
from calibre.ebooks.conversion.plugins.epub_input import (
|
||||||
ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data)
|
ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data)
|
||||||
@ -126,17 +124,15 @@ class Container(object): # {{{
|
|||||||
for f in filenames:
|
for f in filenames:
|
||||||
path = join(dirpath, f)
|
path = join(dirpath, f)
|
||||||
name = self.abspath_to_name(path)
|
name = self.abspath_to_name(path)
|
||||||
if isosx:
|
# OS X silently changes all file names to NFD form. The EPUB
|
||||||
# OS X silently changes all file names to NFD form. The
|
# spec requires all text including filenames to be in NFC form.
|
||||||
# EPUB spec requires all text including filenames to be in
|
# The proper fix is to implement a VFS that maps between
|
||||||
# NFC form. The proper fix is to implement a VFS that maps
|
# canonical names and their file system representation, however,
|
||||||
# between canonical names and their filesystem
|
# I dont have the time for that now. Note that the container
|
||||||
# representation, however, I dont have the time for that
|
# ensures that all text files are normalized to NFC when
|
||||||
# now, so this will at least fix the problem for books that
|
# decoding them anyway, so there should be no mismatch between
|
||||||
# properly use the NFC form. Books that use the NFD form
|
# names in the text and NFC canonical file names.
|
||||||
# will be broken by this, but that's the price you pay for
|
name = unicodedata.normalize('NFC', name)
|
||||||
# using OS X.
|
|
||||||
name = unicodedata.normalize('NFC', name)
|
|
||||||
self.name_path_map[name] = path
|
self.name_path_map[name] = path
|
||||||
self.mime_map[name] = guess_type(path)
|
self.mime_map[name] = guess_type(path)
|
||||||
# Special case if we have stumbled onto the opf
|
# Special case if we have stumbled onto the opf
|
||||||
@ -373,7 +369,7 @@ class Container(object): # {{{
|
|||||||
abspath_to_name() for that.'''
|
abspath_to_name() for that.'''
|
||||||
return relpath(path, base or self.root)
|
return relpath(path, base or self.root)
|
||||||
|
|
||||||
def decode(self, data):
|
def decode(self, data, normalize_to_nfc=True):
|
||||||
"""Automatically decode :param:`data` into a `unicode` object."""
|
"""Automatically decode :param:`data` into a `unicode` object."""
|
||||||
def fix_data(d):
|
def fix_data(d):
|
||||||
return d.replace('\r\n', '\n').replace('\r', '\n')
|
return d.replace('\r\n', '\n').replace('\r', '\n')
|
||||||
@ -402,6 +398,8 @@ class Container(object): # {{{
|
|||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
data, self.used_encoding = xml_to_unicode(data)
|
data, self.used_encoding = xml_to_unicode(data)
|
||||||
|
if normalize_to_nfc:
|
||||||
|
data = unicodedata.normalize('NFC', data)
|
||||||
return fix_data(data)
|
return fix_data(data)
|
||||||
|
|
||||||
def ok_to_be_unmanifested(self, name):
|
def ok_to_be_unmanifested(self, name):
|
||||||
@ -422,6 +420,7 @@ class Container(object): # {{{
|
|||||||
def parse_xml(self, data):
|
def parse_xml(self, data):
|
||||||
data, self.used_encoding = xml_to_unicode(
|
data, self.used_encoding = xml_to_unicode(
|
||||||
data, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)
|
data, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)
|
||||||
|
data = unicodedata.normalize('NFC', data)
|
||||||
return etree.fromstring(data, parser=RECOVER_PARSER)
|
return etree.fromstring(data, parser=RECOVER_PARSER)
|
||||||
|
|
||||||
def parse_xhtml(self, data, fname='<string>', force_html5_parse=False):
|
def parse_xhtml(self, data, fname='<string>', force_html5_parse=False):
|
||||||
@ -447,11 +446,11 @@ class Container(object): # {{{
|
|||||||
data = self.parse_css(data, self.relpath(path))
|
data = self.parse_css(data, self.relpath(path))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def raw_data(self, name, decode=True):
|
def raw_data(self, name, decode=True, normalize_to_nfc=True):
|
||||||
ans = self.open(name).read()
|
ans = self.open(name).read()
|
||||||
mime = self.mime_map.get(name, guess_type(name))
|
mime = self.mime_map.get(name, guess_type(name))
|
||||||
if decode and (mime in OEB_STYLES or mime in OEB_DOCS or mime == 'text/plain' or mime[-4:] in {'+xml', '/xml'}):
|
if decode and (mime in OEB_STYLES or mime in OEB_DOCS or mime == 'text/plain' or mime[-4:] in {'+xml', '/xml'}):
|
||||||
ans = self.decode(ans)
|
ans = self.decode(ans, normalize_to_nfc=normalize_to_nfc)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def parse_css(self, data, fname='<string>', is_declaration=False):
|
def parse_css(self, data, fname='<string>', is_declaration=False):
|
||||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, os, shlex, subprocess, shutil
|
import sys, os, shlex, subprocess, shutil, unicodedata
|
||||||
|
|
||||||
from calibre import prints, as_unicode, walk
|
from calibre import prints, as_unicode, walk
|
||||||
from calibre.constants import iswindows, __appname__
|
from calibre.constants import iswindows, __appname__
|
||||||
@ -67,7 +67,7 @@ def zip_rebuilder(tdir, path):
|
|||||||
if fn in exclude_files:
|
if fn in exclude_files:
|
||||||
continue
|
continue
|
||||||
absfn = os.path.join(root, fn)
|
absfn = os.path.join(root, fn)
|
||||||
zfn = os.path.relpath(absfn, tdir).replace(os.sep, '/')
|
zfn = unicodedata.normalize('NFC', os.path.relpath(absfn, tdir).replace(os.sep, '/'))
|
||||||
zf.write(absfn, zfn)
|
zf.write(absfn, zfn)
|
||||||
|
|
||||||
def get_tools(fmt):
|
def get_tools(fmt):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user