Fix #1145 (Soft Hyphens are not removed when placed after a "-")

This commit is contained in:
Kovid Goyal 2008-10-09 11:57:11 -07:00
parent 92ba788069
commit fec88e8d4e
2 changed files with 9 additions and 6 deletions

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
'''
Conversion to EPUB.
'''
import sys, textwrap
import sys, textwrap, re
from calibre.utils.config import Config, StringConfig
from calibre.utils.zipfile import ZipFile, ZIP_STORED
from calibre.ebooks.html import config as common_config, tostring
@ -15,13 +15,13 @@ class DefaultProfile(object):
flow_size = sys.maxint
screen_size = None
remove_soft_hyphens = False
remove_special_chars = False
class PRS505(DefaultProfile):
flow_size = 300000
screen_size = (600, 775)
remove_soft_hyphens = True
remove_special_chars = re.compile(u'[\u200b\u00ad]')
PROFILES = {

View File

@ -316,7 +316,10 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
class PreProcessor(object):
PREPROCESS = [(re.compile(r'&(\S+?);'), entity_to_unicode)]
PREPROCESS = [
# Convert all entities, since lxml doesn't handle them well
(re.compile(r'&(\S+?);'), entity_to_unicode),
]
# Fix pdftohtml markup
PDFTOHTML = [
@ -365,8 +368,8 @@ class PreProcessor(object):
def preprocess(self, html):
opts = getattr(self, 'opts', False)
if opts and hasattr(opts, 'profile') and getattr(opts.profile, 'remove_soft_hyphens', False):
html = html.replace(u'\u00ad', '')
if opts and hasattr(opts, 'profile') and getattr(opts.profile, 'remove_special_chars', False):
html = opts.profile.remove_special_chars.sub('', html)
if self.is_baen(html):
rules = []
elif self.is_book_designer(html):