diff --git a/src/calibre/library/comments.py b/src/calibre/library/comments.py index 018c39bcf7..d4ed1908f5 100644 --- a/src/calibre/library/comments.py +++ b/src/calibre/library/comments.py @@ -8,9 +8,14 @@ __docformat__ = 'restructuredtext en' import re from calibre.constants import preferred_encoding -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \ + CData, Comment, Declaration, ProcessingInstruction from calibre import prepare_string_for_xml +# Hackish - ignoring sentences ending or beginning in numbers to avoid +# confusion with decimal points. +lost_cr_pat = re.compile('([a-z])([\.\?!])([A-Z])') + def comments_to_html(comments): ''' Convert random comment text to normalized, xml-legal block of
s
@@ -41,36 +46,25 @@ def comments_to_html(comments):
if '<' not in comments:
comments = prepare_string_for_xml(comments)
- comments = comments.replace(u'\n', u'
')
- return u'
%s
'%comments - - # Hackish - ignoring sentences ending or beginning in numbers to avoid - # confusion with decimal points. + parts = [u'%s
'%x.replace(u'\n', u's - if re.search('\n\n', comments): - soup = BeautifulSoup() - split_ps = comments.split(u'\n\n') - tsc = 0 - for p in split_ps: - pTag = Tag(soup,'p') - pTag.insert(0,p) - soup.insert(tsc,pTag) - tsc += 1 - comments = soup.renderContents(None) - + comments = comments.replace(u'\n\n', u'
')
# Convert solo returns to
- comments = re.sub('[\r\n]','
', comments)
-
+ comments = comments.replace(u'\n', '
')
# Convert two hyphens to emdash
- comments = re.sub('--', '—', comments)
+ comments = comments.replace('--', '—')
+
soup = BeautifulSoup(comments)
result = BeautifulSoup()
rtc = 0
@@ -85,35 +79,52 @@ def comments_to_html(comments):
ptc = 0
pTag.insert(ptc,prepare_string_for_xml(token))
ptc += 1
-
- elif token.name in ['br','b','i','em']:
+ elif type(token) in (CData, Comment, Declaration,
+ ProcessingInstruction):
+ continue
+ elif token.name in ['br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a',
+ 'hr']:
if not open_pTag:
pTag = Tag(result,'p')
open_pTag = True
ptc = 0
pTag.insert(ptc, token)
ptc += 1
-
else:
if open_pTag:
result.insert(rtc, pTag)
rtc += 1
open_pTag = False
ptc = 0
- # Clean up NavigableStrings for xml
- sub_tokens = list(token.contents)
- for sub_token in sub_tokens:
- if type(sub_token) is NavigableString:
- sub_token.replaceWith(prepare_string_for_xml(sub_token))
result.insert(rtc, token)
rtc += 1
if open_pTag:
result.insert(rtc, pTag)
- paras = result.findAll('p')
- for p in paras:
+ for p in result.findAll('p'):
p['class'] = 'description'
+ for t in result.findAll(text=True):
+ t.replaceWith(prepare_string_for_xml(unicode(t)))
+
return result.renderContents(encoding=None)
+def test():
+ for pat, val in [
+ ('lineone\n\nlinetwo',
+ '
lineone
\nlinetwo
'), + ('a b&c\nf', 'a b&c;
f
a b
cd
'), + ]: + print + print 'Testing: %r'%pat + cval = comments_to_html(pat) + print 'Value: %r'%cval + if comments_to_html(pat) != val: + print 'FAILED' + break + +if __name__ == '__main__': + test() +