diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 2b924dbf2f..b0f8c67cc9 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -9,6 +9,7 @@ __appname__ = 'calibre' import sys, os, logging, mechanize, locale, copy, cStringIO, re, subprocess, \ textwrap, atexit, cPickle, codecs, time from gettext import GNUTranslations +from htmlentitydefs import name2codepoint from math import floor from optparse import OptionParser as _OptionParser from optparse import IndentedHelpFormatter @@ -569,4 +570,35 @@ except Exception, err: pictureflowerror = str(err) if islinux: - os.chdir(cwd) + os.chdir(cwd) + +def entity_to_unicode(match, exceptions=[], encoding='cp1252'): + ''' + @param match: A match object such that '&'+match.group(1)';' is the entity. + @param exceptions: A list of entities to not convert (Each entry is the name of the entity, for e.g. 'apos' or '#1234' + @param encoding: The encoding to use to decode numeric entities between 128 and 256. + If None, the Unicode UCS encoding is used. A common encoding is cp1252. + ''' + ent = match.group(1) + if ent in exceptions: + return '&'+ent+';' + if ent == 'apos': + return "'" + if ent.startswith(u'#x'): + num = int(ent[2:], 16) + if encoding is None or num > 255: + return unichr(num) + return chr(num).decode(encoding) + if ent.startswith(u'#'): + num = int(ent[1:]) + if encoding is None or num > 255: + return unichr(num) + try: + return chr(num).decode(encoding) + except UnicodeDecodeError: + return unichr(num) + try: + return unichr(name2codepoint[ent]) + except KeyError: + return '&'+ent+';' + diff --git a/src/calibre/ebooks/lrf/__init__.py b/src/calibre/ebooks/lrf/__init__.py index 4498ee6400..990ce6b08d 100644 --- a/src/calibre/ebooks/lrf/__init__.py +++ b/src/calibre/ebooks/lrf/__init__.py @@ -110,7 +110,7 @@ def option_parser(usage, gui_mode=False): help=_('Render HTML tables as blocks of text instead of actual tables. This is neccessary if the HTML contains very large or complex tables.')) laf = parser.add_option_group('LOOK AND FEEL') laf.add_option('--base-font-size', action='store', type='float', default=10., - help=_('''Specify the base font size in pts. All fonts are rescaled accordingly. This option obsoletes the --font-delta option and takes precedence over it. To use --font-delta, set this to 0.''')) + help=_('''Specify the base font size in pts. All fonts are rescaled accordingly. This option obsoletes the --font-delta option and takes precedence over it. To use --font-delta, set this to 0. Default: %defaultpt''')) laf.add_option('--enable-autorotation', action='store_true', default=False, help=_('Enable autorotation of images that are wider than the screen width.'), dest='autorotation') @@ -324,32 +324,4 @@ def Book(options, logger, font_delta=0, header=None, raise ConversionError, 'Could not find the normal version of the ' + family + ' font' return book, fonts -def entity_to_unicode(match, exceptions=[], encoding='cp1252'): - ''' - @param match: A match object such that '&'+match.group(1)';' is the entity. - @param exceptions: A list of entities to not convert (Each entry is the name of the entity, for e.g. 'apos' or '#1234' - @param encoding: The encoding to use to decode numeric entities between 128 and 256. - If None, the Unicode UCS encoding is used. A common encoding is cp1252. - ''' - ent = match.group(1) - if ent in exceptions: - return '&'+ent+';' - if ent == 'apos': - return "'" - if ent.startswith(u'#x'): - num = int(ent[2:], 16) - if encoding is None or num > 255: - return unichr(num) - return chr(num).decode(encoding) - if ent.startswith(u'#'): - num = int(ent[1:]) - if encoding is None or num > 255: - return unichr(num) - try: - return chr(num).decode(encoding) - except UnicodeDecodeError: - return unichr(num) - try: - return unichr(name2codepoint[ent]) - except KeyError: - return '&'+ent+';' +from calibre import entity_to_unicode \ No newline at end of file diff --git a/src/calibre/ebooks/lrf/pylrs/pylrs.py b/src/calibre/ebooks/lrf/pylrs/pylrs.py index dae25b5284..55069b9934 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrs.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py @@ -37,6 +37,7 @@ # EmpLine, EmpDots import os, re, codecs, operator +from xml.sax.saxutils import escape from datetime import date try: from elementtree.ElementTree import (Element, SubElement) @@ -53,6 +54,7 @@ DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs from calibre import __appname__, __version__ +from calibre import entity_to_unicode class LrsError(Exception): pass @@ -786,7 +788,7 @@ class TableOfContents(object): class TocLabel(object): def __init__(self, label, textBlock): - self.label = label + self.label = escape(re.sub(r'&(\S+);', entity_to_unicode, label)) self.textBlock = textBlock