Port use of BS for comments_html()

2025-07-09 03:04:10 -04:00 · 2019-03-25 09:52:45 +05:30 · 2019-03-25 09:52:45 +05:30 · de9d97d688
commit de9d97d688
parent a137e676a6
2 changed files with 38 additions and 37 deletions
--- a/setup/test.py
+++ b/setup/test.py
@ -74,6 +74,8 @@ def find_tests(which_tests=None):
        a(find_tests())
        from calibre.utils.html2text import find_tests
        a(find_tests())
+        from calibre.library.comments import find_tests
+        a(find_tests())
    if ok('dbcli'):
        from calibre.db.cli.tests import find_tests
        a(find_tests())
--- a/src/calibre/library/comments.py
+++ b/src/calibre/library/comments.py
@ -1,17 +1,17 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>

-from __future__ import print_function
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
+from __future__ import print_function, unicode_literals

 import re

-from calibre.constants import preferred_encoding
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, \
-        CData, Comment, Declaration, ProcessingInstruction
 from calibre import prepare_string_for_xml
+from calibre.constants import preferred_encoding
+from calibre.ebooks.BeautifulSoup import (
+    BeautifulSoup, CData, Comment, Declaration, NavigableString,
+    ProcessingInstruction
+)
 from calibre.utils.html2text import html2text
 from polyglot.builtins import unicode_type

@ -88,23 +88,23 @@ def comments_to_html(comments):
    # Convert two hyphens to emdash
    comments = comments.replace('--', '&mdash;')

-    soup = BeautifulSoup(comments)
-    result = BeautifulSoup()
+    soup = BeautifulSoup('<div>' + comments + '</div>').find('div')
+    result = BeautifulSoup('<div>')
+    container = result.find('div')
    rtc = 0
    open_pTag = False

    all_tokens = list(soup.contents)
    for token in all_tokens:
+        if isinstance(token,  (CData, Comment, Declaration, ProcessingInstruction)):
+            continue
        if isinstance(token, NavigableString):
            if not open_pTag:
                pTag = result.new_tag('p')
                open_pTag = True
                ptc = 0
-            pTag.insert(ptc,prepare_string_for_xml(token))
+            pTag.insert(ptc, token)
            ptc += 1
-        elif isinstance(token,  (CData, Comment, Declaration,
-                ProcessingInstruction)):
-            continue
        elif token.name in ['br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a',
                'hr']:
            if not open_pTag:
@ -115,23 +115,20 @@ def comments_to_html(comments):
            ptc += 1
        else:
            if open_pTag:
-                result.insert(rtc, pTag)
+                container.insert(rtc, pTag)
                rtc += 1
                open_pTag = False
                ptc = 0
-            result.insert(rtc, token)
+            container.insert(rtc, token)
            rtc += 1

    if open_pTag:
-        result.insert(rtc, pTag)
+        container.insert(rtc, pTag)

-    for p in result.findAll('p'):
+    for p in container.findAll('p'):
        p['class'] = 'description'

-    for t in result.findAll(text=True):
-        t.replaceWith(prepare_string_for_xml(unicode_type(t)))
-
-    return result.decode_contents()
+    return container.decode_contents()


 def markdown(val):
@ -155,21 +152,23 @@ def sanitize_comments_html(html):
    return html


-def test():
-    for pat, val in [
-            ('lineone\n\nlinetwo',
-                '<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
-            ('a <b>b&c</b>\nf', '<p class="description">a <b>b&amp;c;</b><br />f</p>'),
-            ('a <?xml asd> b\n\ncd', '<p class="description">a  b</p><p class="description">cd</p>'),
+def find_tests():
+    import unittest
+
+    class Test(unittest.TestCase):
+
+        def test_comments_to_html(self):
+            for pat, val in [
+                    (b'lineone\n\nlinetwo',
+                        '<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
+
+                    ('a <b>b&c</b>\nf',
+                        '<p class="description">a <b>b&amp;c</b><br></br>f</p>'),
+
+                    ('a <?xml asd> b\n\ncd',
+                        '<p class="description">a  b</p><p class="description">cd</p>'),
            ]:
-        print()
-        print('Testing: %r'%pat)
-        cval = comments_to_html(pat)
-        print('Value: %r'%cval)
-        if comments_to_html(pat) != val:
-            print('FAILED')
-            break
+                cval = comments_to_html(pat)
+                self.assertEqual(cval, val)

-
-if __name__ == '__main__':
-    test()
+    return unittest.defaultTestLoader.loadTestsFromTestCase(Test)