Port use of BS for comments_html()

2025-08-30 23:00:21 -04:00 · 2019-03-25 09:52:45 +05:30 · 2019-03-25 09:52:45 +05:30 · de9d97d688
commit de9d97d688
parent a137e676a6
2 changed files with 38 additions and 37 deletions
--- a/setup/test.py
+++ b/setup/test.py
@ -74,6 +74,8 @@ def find_tests(which_tests=None):
        a(find_tests())
        from calibre.utils.html2text import find_tests
        a(find_tests())
        from calibre.library.comments import find_tests
        a(find_tests())
    if ok('dbcli'):
        from calibre.db.cli.tests import find_tests
        a(find_tests())
--- a/src/calibre/library/comments.py
+++ b/src/calibre/library/comments.py
@ -1,17 +1,17 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 # License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re
 from calibre.constants import preferred_encoding
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, \
        CData, Comment, Declaration, ProcessingInstruction
 from calibre import prepare_string_for_xml
 from calibre.constants import preferred_encoding
 from calibre.ebooks.BeautifulSoup import (
    BeautifulSoup, CData, Comment, Declaration, NavigableString,
    ProcessingInstruction
 )
 from calibre.utils.html2text import html2text
 from polyglot.builtins import unicode_type
@ -88,23 +88,23 @@ def comments_to_html(comments):
    # Convert two hyphens to emdash
    comments = comments.replace('--', '&mdash;')
-    soup = BeautifulSoup(comments)
+    soup = BeautifulSoup('<div>' + comments + '</div>').find('div')
-    result = BeautifulSoup()
+    result = BeautifulSoup('<div>')
    container = result.find('div')
    rtc = 0
    open_pTag = False
    all_tokens = list(soup.contents)
    for token in all_tokens:
        if isinstance(token,  (CData, Comment, Declaration, ProcessingInstruction)):
            continue
        if isinstance(token, NavigableString):
            if not open_pTag:
                pTag = result.new_tag('p')
                open_pTag = True
                ptc = 0
-            pTag.insert(ptc,prepare_string_for_xml(token))
+            pTag.insert(ptc, token)
            ptc += 1
        elif isinstance(token,  (CData, Comment, Declaration,
                ProcessingInstruction)):
            continue
        elif token.name in ['br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a',
                'hr']:
            if not open_pTag:
@ -115,23 +115,20 @@ def comments_to_html(comments):
            ptc += 1
        else:
            if open_pTag:
-                result.insert(rtc, pTag)
+                container.insert(rtc, pTag)
                rtc += 1
                open_pTag = False
                ptc = 0
-            result.insert(rtc, token)
+            container.insert(rtc, token)
            rtc += 1
    if open_pTag:
-        result.insert(rtc, pTag)
+        container.insert(rtc, pTag)
-    for p in result.findAll('p'):
+    for p in container.findAll('p'):
        p['class'] = 'description'
-    for t in result.findAll(text=True):
+    return container.decode_contents()
        t.replaceWith(prepare_string_for_xml(unicode_type(t)))
    return result.decode_contents()
 def markdown(val):
@ -155,21 +152,23 @@ def sanitize_comments_html(html):
    return html
-def test():
+def find_tests():
-    for pat, val in [
+    import unittest
-            ('lineone\n\nlinetwo',
+
-                '<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
+    class Test(unittest.TestCase):
-            ('a <b>b&c</b>\nf', '<p class="description">a <b>b&amp;c;</b><br />f</p>'),
+
-            ('a <?xml asd> b\n\ncd', '<p class="description">a  b</p><p class="description">cd</p>'),
+        def test_comments_to_html(self):
            for pat, val in [
                    (b'lineone\n\nlinetwo',
                        '<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
                    ('a <b>b&c</b>\nf',
                        '<p class="description">a <b>b&amp;c</b><br></br>f</p>'),
                    ('a <?xml asd> b\n\ncd',
                        '<p class="description">a  b</p><p class="description">cd</p>'),
            ]:
-        print()
+                cval = comments_to_html(pat)
-        print('Testing: %r'%pat)
+                self.assertEqual(cval, val)
        cval = comments_to_html(pat)
        print('Value: %r'%cval)
        if comments_to_html(pat) != val:
            print('FAILED')
            break
-
+    return unittest.defaultTestLoader.loadTestsFromTestCase(Test)
 if __name__ == '__main__':
    test()