mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Properly escape invalid characters in TOC labels when generating LRF files
This commit is contained in:
parent
96aa2c8b0b
commit
8670321901
@ -9,6 +9,7 @@ __appname__ = 'calibre'
|
|||||||
import sys, os, logging, mechanize, locale, copy, cStringIO, re, subprocess, \
|
import sys, os, logging, mechanize, locale, copy, cStringIO, re, subprocess, \
|
||||||
textwrap, atexit, cPickle, codecs, time
|
textwrap, atexit, cPickle, codecs, time
|
||||||
from gettext import GNUTranslations
|
from gettext import GNUTranslations
|
||||||
|
from htmlentitydefs import name2codepoint
|
||||||
from math import floor
|
from math import floor
|
||||||
from optparse import OptionParser as _OptionParser
|
from optparse import OptionParser as _OptionParser
|
||||||
from optparse import IndentedHelpFormatter
|
from optparse import IndentedHelpFormatter
|
||||||
@ -570,3 +571,34 @@ except Exception, err:
|
|||||||
|
|
||||||
if islinux:
|
if islinux:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
|
||||||
|
def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
|
||||||
|
'''
|
||||||
|
@param match: A match object such that '&'+match.group(1)';' is the entity.
|
||||||
|
@param exceptions: A list of entities to not convert (Each entry is the name of the entity, for e.g. 'apos' or '#1234'
|
||||||
|
@param encoding: The encoding to use to decode numeric entities between 128 and 256.
|
||||||
|
If None, the Unicode UCS encoding is used. A common encoding is cp1252.
|
||||||
|
'''
|
||||||
|
ent = match.group(1)
|
||||||
|
if ent in exceptions:
|
||||||
|
return '&'+ent+';'
|
||||||
|
if ent == 'apos':
|
||||||
|
return "'"
|
||||||
|
if ent.startswith(u'#x'):
|
||||||
|
num = int(ent[2:], 16)
|
||||||
|
if encoding is None or num > 255:
|
||||||
|
return unichr(num)
|
||||||
|
return chr(num).decode(encoding)
|
||||||
|
if ent.startswith(u'#'):
|
||||||
|
num = int(ent[1:])
|
||||||
|
if encoding is None or num > 255:
|
||||||
|
return unichr(num)
|
||||||
|
try:
|
||||||
|
return chr(num).decode(encoding)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return unichr(num)
|
||||||
|
try:
|
||||||
|
return unichr(name2codepoint[ent])
|
||||||
|
except KeyError:
|
||||||
|
return '&'+ent+';'
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ def option_parser(usage, gui_mode=False):
|
|||||||
help=_('Render HTML tables as blocks of text instead of actual tables. This is neccessary if the HTML contains very large or complex tables.'))
|
help=_('Render HTML tables as blocks of text instead of actual tables. This is neccessary if the HTML contains very large or complex tables.'))
|
||||||
laf = parser.add_option_group('LOOK AND FEEL')
|
laf = parser.add_option_group('LOOK AND FEEL')
|
||||||
laf.add_option('--base-font-size', action='store', type='float', default=10.,
|
laf.add_option('--base-font-size', action='store', type='float', default=10.,
|
||||||
help=_('''Specify the base font size in pts. All fonts are rescaled accordingly. This option obsoletes the --font-delta option and takes precedence over it. To use --font-delta, set this to 0.'''))
|
help=_('''Specify the base font size in pts. All fonts are rescaled accordingly. This option obsoletes the --font-delta option and takes precedence over it. To use --font-delta, set this to 0. Default: %defaultpt'''))
|
||||||
laf.add_option('--enable-autorotation', action='store_true', default=False,
|
laf.add_option('--enable-autorotation', action='store_true', default=False,
|
||||||
help=_('Enable autorotation of images that are wider than the screen width.'),
|
help=_('Enable autorotation of images that are wider than the screen width.'),
|
||||||
dest='autorotation')
|
dest='autorotation')
|
||||||
@ -324,32 +324,4 @@ def Book(options, logger, font_delta=0, header=None,
|
|||||||
raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
|
raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
|
||||||
return book, fonts
|
return book, fonts
|
||||||
|
|
||||||
def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
|
from calibre import entity_to_unicode
|
||||||
'''
|
|
||||||
@param match: A match object such that '&'+match.group(1)';' is the entity.
|
|
||||||
@param exceptions: A list of entities to not convert (Each entry is the name of the entity, for e.g. 'apos' or '#1234'
|
|
||||||
@param encoding: The encoding to use to decode numeric entities between 128 and 256.
|
|
||||||
If None, the Unicode UCS encoding is used. A common encoding is cp1252.
|
|
||||||
'''
|
|
||||||
ent = match.group(1)
|
|
||||||
if ent in exceptions:
|
|
||||||
return '&'+ent+';'
|
|
||||||
if ent == 'apos':
|
|
||||||
return "'"
|
|
||||||
if ent.startswith(u'#x'):
|
|
||||||
num = int(ent[2:], 16)
|
|
||||||
if encoding is None or num > 255:
|
|
||||||
return unichr(num)
|
|
||||||
return chr(num).decode(encoding)
|
|
||||||
if ent.startswith(u'#'):
|
|
||||||
num = int(ent[1:])
|
|
||||||
if encoding is None or num > 255:
|
|
||||||
return unichr(num)
|
|
||||||
try:
|
|
||||||
return chr(num).decode(encoding)
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
return unichr(num)
|
|
||||||
try:
|
|
||||||
return unichr(name2codepoint[ent])
|
|
||||||
except KeyError:
|
|
||||||
return '&'+ent+';'
|
|
@ -37,6 +37,7 @@
|
|||||||
# EmpLine, EmpDots
|
# EmpLine, EmpDots
|
||||||
|
|
||||||
import os, re, codecs, operator
|
import os, re, codecs, operator
|
||||||
|
from xml.sax.saxutils import escape
|
||||||
from datetime import date
|
from datetime import date
|
||||||
try:
|
try:
|
||||||
from elementtree.ElementTree import (Element, SubElement)
|
from elementtree.ElementTree import (Element, SubElement)
|
||||||
@ -53,6 +54,7 @@ DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set
|
|||||||
DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
||||||
|
|
||||||
from calibre import __appname__, __version__
|
from calibre import __appname__, __version__
|
||||||
|
from calibre import entity_to_unicode
|
||||||
|
|
||||||
class LrsError(Exception):
|
class LrsError(Exception):
|
||||||
pass
|
pass
|
||||||
@ -786,7 +788,7 @@ class TableOfContents(object):
|
|||||||
|
|
||||||
class TocLabel(object):
|
class TocLabel(object):
|
||||||
def __init__(self, label, textBlock):
|
def __init__(self, label, textBlock):
|
||||||
self.label = label
|
self.label = escape(re.sub(r'&(\S+);', entity_to_unicode, label))
|
||||||
self.textBlock = textBlock
|
self.textBlock = textBlock
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user