From de9d97d6885859f9c64584a75009ceb185606a27 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Mar 2019 09:52:45 +0530 Subject: [PATCH] Port use of BS for comments_html() --- setup/test.py | 2 + src/calibre/library/comments.py | 73 ++++++++++++++++----------------- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/setup/test.py b/setup/test.py index a640683191..6a85007898 100644 --- a/setup/test.py +++ b/setup/test.py @@ -74,6 +74,8 @@ def find_tests(which_tests=None): a(find_tests()) from calibre.utils.html2text import find_tests a(find_tests()) + from calibre.library.comments import find_tests + a(find_tests()) if ok('dbcli'): from calibre.db.cli.tests import find_tests a(find_tests()) diff --git a/src/calibre/library/comments.py b/src/calibre/library/comments.py index 5a176cb53c..510bd344ab 100644 --- a/src/calibre/library/comments.py +++ b/src/calibre/library/comments.py @@ -1,17 +1,17 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# License: GPLv3 Copyright: 2010, Kovid Goyal -from __future__ import print_function -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' +from __future__ import print_function, unicode_literals import re -from calibre.constants import preferred_encoding -from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, \ - CData, Comment, Declaration, ProcessingInstruction from calibre import prepare_string_for_xml +from calibre.constants import preferred_encoding +from calibre.ebooks.BeautifulSoup import ( + BeautifulSoup, CData, Comment, Declaration, NavigableString, + ProcessingInstruction +) from calibre.utils.html2text import html2text from polyglot.builtins import unicode_type @@ -88,23 +88,23 @@ def comments_to_html(comments): # Convert two hyphens to emdash comments = comments.replace('--', '—') - soup = BeautifulSoup(comments) - result = BeautifulSoup() + soup = BeautifulSoup('
' + comments + '
').find('div') + result = BeautifulSoup('
') + container = result.find('div') rtc = 0 open_pTag = False all_tokens = list(soup.contents) for token in all_tokens: + if isinstance(token, (CData, Comment, Declaration, ProcessingInstruction)): + continue if isinstance(token, NavigableString): if not open_pTag: pTag = result.new_tag('p') open_pTag = True ptc = 0 - pTag.insert(ptc,prepare_string_for_xml(token)) + pTag.insert(ptc, token) ptc += 1 - elif isinstance(token, (CData, Comment, Declaration, - ProcessingInstruction)): - continue elif token.name in ['br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a', 'hr']: if not open_pTag: @@ -115,23 +115,20 @@ def comments_to_html(comments): ptc += 1 else: if open_pTag: - result.insert(rtc, pTag) + container.insert(rtc, pTag) rtc += 1 open_pTag = False ptc = 0 - result.insert(rtc, token) + container.insert(rtc, token) rtc += 1 if open_pTag: - result.insert(rtc, pTag) + container.insert(rtc, pTag) - for p in result.findAll('p'): + for p in container.findAll('p'): p['class'] = 'description' - for t in result.findAll(text=True): - t.replaceWith(prepare_string_for_xml(unicode_type(t))) - - return result.decode_contents() + return container.decode_contents() def markdown(val): @@ -155,21 +152,23 @@ def sanitize_comments_html(html): return html -def test(): - for pat, val in [ - ('lineone\n\nlinetwo', - '

lineone

\n

linetwo

'), - ('a b&c\nf', '

a b&c;
f

'), - ('a b\n\ncd', '

a b

cd

'), +def find_tests(): + import unittest + + class Test(unittest.TestCase): + + def test_comments_to_html(self): + for pat, val in [ + (b'lineone\n\nlinetwo', + '

lineone

\n

linetwo

'), + + ('a b&c\nf', + '

a b&c

f

'), + + ('a b\n\ncd', + '

a b

cd

'), ]: - print() - print('Testing: %r'%pat) - cval = comments_to_html(pat) - print('Value: %r'%cval) - if comments_to_html(pat) != val: - print('FAILED') - break + cval = comments_to_html(pat) + self.assertEqual(cval, val) - -if __name__ == '__main__': - test() + return unittest.defaultTestLoader.loadTestsFromTestCase(Test)