mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More sophisticated handling of blank spaces and added --wordspace
This commit is contained in:
parent
0ace5e730b
commit
c5d4f81ccb
@ -13,7 +13,7 @@
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
''' E-book management software'''
|
||||
__version__ = "0.3.81"
|
||||
__version__ = "0.3.82"
|
||||
__docformat__ = "epytext"
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
__appname__ = 'libprs500'
|
||||
|
@ -103,6 +103,8 @@ def option_parser(usage):
|
||||
dest='font_delta')
|
||||
laf.add_option('--disable-autorotation', action='store_true', default=False,
|
||||
help='Disable autorotation of images.', dest='disable_autorotation')
|
||||
laf.add_option('--wordspace', dest='wordspace', default=2.5, type='float',
|
||||
help='Set the space between words in pts. Default is %default')
|
||||
page = parser.add_option_group('PAGE OPTIONS')
|
||||
page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
|
||||
choices=profiles, action='callback', callback=profile_from_string,
|
||||
@ -240,7 +242,8 @@ def Book(options, font_delta=0, header=None,
|
||||
tsd = dict(fontsize=fontsize,
|
||||
parindent=int(profile.parindent),
|
||||
linespace=int(10*profile.line_space),
|
||||
baselineskip=baselineskip)
|
||||
baselineskip=baselineskip,
|
||||
wordspace=10*options.wordspace)
|
||||
if fonts['serif'] and fonts['serif'].has_key('normal'):
|
||||
tsd['fontfacename'] = fonts['serif']['normal'][1]
|
||||
|
||||
|
@ -47,7 +47,7 @@ from libprs500 import extract, filename_to_utf8
|
||||
from libprs500.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class Span(_Span):
|
||||
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo', 'nbsp' ]
|
||||
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo' ]
|
||||
patterns = [ re.compile('&'+i+';') for i in replaced_entities ]
|
||||
targets = [ unichr(name2codepoint[i]) for i in replaced_entities ]
|
||||
rules = zip(patterns, targets)
|
||||
@ -229,9 +229,6 @@ class HTMLConverter(object):
|
||||
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
|
||||
# Fix <a /> elements
|
||||
MARKUP_MASSAGE = [
|
||||
# Convert into a normal space as the default
|
||||
# conversion converts it into \xa0 which is not a space in LRF
|
||||
(re.compile(' '), lambda match : ' '),
|
||||
# Close <a /> tags
|
||||
(re.compile("(<a\s+.*?)/>|<a/>", re.IGNORECASE),
|
||||
lambda match: match.group(1)+"></a>"),
|
||||
@ -401,7 +398,6 @@ class HTMLConverter(object):
|
||||
self.soup = BeautifulSoup(raw,
|
||||
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
||||
markupMassage=nmassage)
|
||||
#print self.soup
|
||||
print 'done\n\tConverting to BBeB...',
|
||||
sys.stdout.flush()
|
||||
self.verbose = verbose
|
||||
@ -763,7 +759,8 @@ class HTMLConverter(object):
|
||||
@param css:
|
||||
@type css:
|
||||
'''
|
||||
src = tag.string if hasattr(tag, 'string') else tag
|
||||
src = tag.string if hasattr(tag, 'string') else tag
|
||||
src = re.sub(r'\s{1,}', ' ', src)
|
||||
if self.lstrip_toggle:
|
||||
src = src.lstrip()
|
||||
self.lstrip_toggle = False
|
||||
@ -774,6 +771,7 @@ class HTMLConverter(object):
|
||||
try:
|
||||
self.current_para.append(Span(src, self.sanctify_css(css), self.memory,\
|
||||
self.profile.dpi, self.fonts, font_delta=self.font_delta))
|
||||
self.current_para.normalize_spaces()
|
||||
except ConversionError, err:
|
||||
if self.verbose:
|
||||
print >>sys.stderr, err
|
||||
|
@ -252,6 +252,51 @@ class LrsContainer(object):
|
||||
self.must_append = False
|
||||
|
||||
|
||||
def normalize_spaces(self, prior_text=False):
|
||||
'''
|
||||
Remove multiple spaces and handle
|
||||
@param prior_text: True if the paragraph this container is part of
|
||||
has non whitespace text before this container.
|
||||
'''
|
||||
temp = []
|
||||
for i in range(len(self.contents)):
|
||||
elem = self.contents[i]
|
||||
try:
|
||||
if isinstance(elem, Text):
|
||||
n = self.contents[i+1]
|
||||
if isinstance(n, Text):
|
||||
elem.text += n.text
|
||||
i += 1
|
||||
except:
|
||||
continue
|
||||
finally:
|
||||
temp.append(elem)
|
||||
self.contents = temp
|
||||
|
||||
def has_prior_text(idx):
|
||||
for i in range(idx):
|
||||
con = self.contents[i]
|
||||
if hasattr(con, 'has_text') and con.has_text():
|
||||
return True
|
||||
return False
|
||||
|
||||
for i in range(len(self.contents)):
|
||||
elem = self.contents[i]
|
||||
if not prior_text and i > 0:
|
||||
prior_text = has_prior_text(i)
|
||||
|
||||
if isinstance(elem, Text):
|
||||
src = elem.text
|
||||
if isinstance(src, basestring):
|
||||
src = re.sub(r'\s{1,}', ' ', src)
|
||||
if isinstance(self.contents[i-1], (CR, DropCaps)) \
|
||||
or not prior_text:
|
||||
src = src.lstrip()
|
||||
src = src.replace(u'\xa0', ' ') # nbsp is replaced with \xa0 by BeatifulSoup
|
||||
elem.text = src
|
||||
elif hasattr(elem, 'normalize_spaces'):
|
||||
elem.normalize_spaces(prior_text)
|
||||
|
||||
def has_text(self):
|
||||
''' Return True iff this container has non whitespace text '''
|
||||
if hasattr(self, 'text'):
|
||||
@ -1508,7 +1553,6 @@ class Paragraph(LrsContainer):
|
||||
if text is not None:
|
||||
self.append(text)
|
||||
|
||||
|
||||
def CR(self):
|
||||
# Okay, here's a single autoappender for this common operation
|
||||
cr = CR()
|
||||
|
Loading…
x
Reference in New Issue
Block a user