diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index bf4e54e657..56fb0903bb 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -13,7 +13,7 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ''' E-book management software''' -__version__ = "0.3.81" +__version__ = "0.3.82" __docformat__ = "epytext" __author__ = "Kovid Goyal " __appname__ = 'libprs500' diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py index 247db16bf5..0400a43a07 100644 --- a/src/libprs500/ebooks/lrf/__init__.py +++ b/src/libprs500/ebooks/lrf/__init__.py @@ -103,6 +103,8 @@ def option_parser(usage): dest='font_delta') laf.add_option('--disable-autorotation', action='store_true', default=False, help='Disable autorotation of images.', dest='disable_autorotation') + laf.add_option('--wordspace', dest='wordspace', default=2.5, type='float', + help='Set the space between words in pts. Default is %default') page = parser.add_option_group('PAGE OPTIONS') page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice', choices=profiles, action='callback', callback=profile_from_string, @@ -240,7 +242,8 @@ def Book(options, font_delta=0, header=None, tsd = dict(fontsize=fontsize, parindent=int(profile.parindent), linespace=int(10*profile.line_space), - baselineskip=baselineskip) + baselineskip=baselineskip, + wordspace=10*options.wordspace) if fonts['serif'] and fonts['serif'].has_key('normal'): tsd['fontfacename'] = fonts['serif']['normal'][1] diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 981cbc71bd..2bf8553c95 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -47,7 +47,7 @@ from libprs500 import extract, filename_to_utf8 from libprs500.ptempfile import PersistentTemporaryFile class Span(_Span): - replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo', 'nbsp' ] + replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo' ] patterns = [ re.compile('&'+i+';') for i in replaced_entities ] targets = [ unichr(name2codepoint[i]) for i in replaced_entities ] rules = zip(patterns, targets) @@ -229,9 +229,6 @@ class HTMLConverter(object): IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) # Fix elements MARKUP_MASSAGE = [ - # Convert   into a normal space as the default - # conversion converts it into \xa0 which is not a space in LRF - (re.compile(' '), lambda match : ' '), # Close tags (re.compile("(|", re.IGNORECASE), lambda match: match.group(1)+">"), @@ -401,7 +398,6 @@ class HTMLConverter(object): self.soup = BeautifulSoup(raw, convertEntities=BeautifulSoup.HTML_ENTITIES, markupMassage=nmassage) - #print self.soup print 'done\n\tConverting to BBeB...', sys.stdout.flush() self.verbose = verbose @@ -763,7 +759,8 @@ class HTMLConverter(object): @param css: @type css: ''' - src = tag.string if hasattr(tag, 'string') else tag + src = tag.string if hasattr(tag, 'string') else tag + src = re.sub(r'\s{1,}', ' ', src) if self.lstrip_toggle: src = src.lstrip() self.lstrip_toggle = False @@ -774,6 +771,7 @@ class HTMLConverter(object): try: self.current_para.append(Span(src, self.sanctify_css(css), self.memory,\ self.profile.dpi, self.fonts, font_delta=self.font_delta)) + self.current_para.normalize_spaces() except ConversionError, err: if self.verbose: print >>sys.stderr, err diff --git a/src/libprs500/ebooks/lrf/pylrs/pylrs.py b/src/libprs500/ebooks/lrf/pylrs/pylrs.py index 54ed51776c..f30722c1d1 100644 --- a/src/libprs500/ebooks/lrf/pylrs/pylrs.py +++ b/src/libprs500/ebooks/lrf/pylrs/pylrs.py @@ -252,6 +252,51 @@ class LrsContainer(object): self.must_append = False + def normalize_spaces(self, prior_text=False): + ''' + Remove multiple spaces and handle   + @param prior_text: True if the paragraph this container is part of + has non whitespace text before this container. + ''' + temp = [] + for i in range(len(self.contents)): + elem = self.contents[i] + try: + if isinstance(elem, Text): + n = self.contents[i+1] + if isinstance(n, Text): + elem.text += n.text + i += 1 + except: + continue + finally: + temp.append(elem) + self.contents = temp + + def has_prior_text(idx): + for i in range(idx): + con = self.contents[i] + if hasattr(con, 'has_text') and con.has_text(): + return True + return False + + for i in range(len(self.contents)): + elem = self.contents[i] + if not prior_text and i > 0: + prior_text = has_prior_text(i) + + if isinstance(elem, Text): + src = elem.text + if isinstance(src, basestring): + src = re.sub(r'\s{1,}', ' ', src) + if isinstance(self.contents[i-1], (CR, DropCaps)) \ + or not prior_text: + src = src.lstrip() + src = src.replace(u'\xa0', ' ') # nbsp is replaced with \xa0 by BeatifulSoup + elem.text = src + elif hasattr(elem, 'normalize_spaces'): + elem.normalize_spaces(prior_text) + def has_text(self): ''' Return True iff this container has non whitespace text ''' if hasattr(self, 'text'): @@ -1508,7 +1553,6 @@ class Paragraph(LrsContainer): if text is not None: self.append(text) - def CR(self): # Okay, here's a single autoappender for this common operation cr = CR()