Conversion pipeline: When converting the :first-letter pseudo CSS selector to a <span> follow W3C rules for handling leading punctuation characters. Fixes #9319 (Problem with Mobi conversion and ePub conversion)

This commit is contained in:
Kovid Goyal 2011-03-07 19:42:10 -07:00
parent b7736da887
commit bfd77656a6

View File

@ -8,11 +8,7 @@ from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>' __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import os import os, itertools, re, logging, copy, unicodedata
import itertools
import re
import logging
import copy
from weakref import WeakKeyDictionary from weakref import WeakKeyDictionary
from xml.dom import SyntaxErr as CSSSyntaxError from xml.dom import SyntaxErr as CSSSyntaxError
import cssutils import cssutils
@ -234,8 +230,18 @@ class Stylizer(object):
for elem in matches: for elem in matches:
for x in elem.iter(): for x in elem.iter():
if x.text: if x.text:
span = E.span(x.text[0]) punctuation_chars = []
span.tail = x.text[1:] text = unicode(x.text)
while text:
if not unicodedata.category(text[0]).startswith('P'):
break
punctuation_chars.append(text[0])
text = text[1:]
special_text = u''.join(punctuation_chars) + \
(text[0] if text else u'')
span = E.span(special_text)
span.tail = text[1:]
x.text = None x.text = None
x.insert(0, span) x.insert(0, span)
self.style(span)._update_cssdict(cssdict) self.style(span)._update_cssdict(cssdict)