mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Port use of BS for comments_html()
This commit is contained in:
parent
a137e676a6
commit
de9d97d688
@ -74,6 +74,8 @@ def find_tests(which_tests=None):
|
|||||||
a(find_tests())
|
a(find_tests())
|
||||||
from calibre.utils.html2text import find_tests
|
from calibre.utils.html2text import find_tests
|
||||||
a(find_tests())
|
a(find_tests())
|
||||||
|
from calibre.library.comments import find_tests
|
||||||
|
a(find_tests())
|
||||||
if ok('dbcli'):
|
if ok('dbcli'):
|
||||||
from calibre.db.cli.tests import find_tests
|
from calibre.db.cli.tests import find_tests
|
||||||
a(find_tests())
|
a(find_tests())
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function, unicode_literals
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre.constants import preferred_encoding
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, \
|
|
||||||
CData, Comment, Declaration, ProcessingInstruction
|
|
||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
|
from calibre.constants import preferred_encoding
|
||||||
|
from calibre.ebooks.BeautifulSoup import (
|
||||||
|
BeautifulSoup, CData, Comment, Declaration, NavigableString,
|
||||||
|
ProcessingInstruction
|
||||||
|
)
|
||||||
from calibre.utils.html2text import html2text
|
from calibre.utils.html2text import html2text
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
@ -88,23 +88,23 @@ def comments_to_html(comments):
|
|||||||
# Convert two hyphens to emdash
|
# Convert two hyphens to emdash
|
||||||
comments = comments.replace('--', '—')
|
comments = comments.replace('--', '—')
|
||||||
|
|
||||||
soup = BeautifulSoup(comments)
|
soup = BeautifulSoup('<div>' + comments + '</div>').find('div')
|
||||||
result = BeautifulSoup()
|
result = BeautifulSoup('<div>')
|
||||||
|
container = result.find('div')
|
||||||
rtc = 0
|
rtc = 0
|
||||||
open_pTag = False
|
open_pTag = False
|
||||||
|
|
||||||
all_tokens = list(soup.contents)
|
all_tokens = list(soup.contents)
|
||||||
for token in all_tokens:
|
for token in all_tokens:
|
||||||
|
if isinstance(token, (CData, Comment, Declaration, ProcessingInstruction)):
|
||||||
|
continue
|
||||||
if isinstance(token, NavigableString):
|
if isinstance(token, NavigableString):
|
||||||
if not open_pTag:
|
if not open_pTag:
|
||||||
pTag = result.new_tag('p')
|
pTag = result.new_tag('p')
|
||||||
open_pTag = True
|
open_pTag = True
|
||||||
ptc = 0
|
ptc = 0
|
||||||
pTag.insert(ptc,prepare_string_for_xml(token))
|
pTag.insert(ptc, token)
|
||||||
ptc += 1
|
ptc += 1
|
||||||
elif isinstance(token, (CData, Comment, Declaration,
|
|
||||||
ProcessingInstruction)):
|
|
||||||
continue
|
|
||||||
elif token.name in ['br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a',
|
elif token.name in ['br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a',
|
||||||
'hr']:
|
'hr']:
|
||||||
if not open_pTag:
|
if not open_pTag:
|
||||||
@ -115,23 +115,20 @@ def comments_to_html(comments):
|
|||||||
ptc += 1
|
ptc += 1
|
||||||
else:
|
else:
|
||||||
if open_pTag:
|
if open_pTag:
|
||||||
result.insert(rtc, pTag)
|
container.insert(rtc, pTag)
|
||||||
rtc += 1
|
rtc += 1
|
||||||
open_pTag = False
|
open_pTag = False
|
||||||
ptc = 0
|
ptc = 0
|
||||||
result.insert(rtc, token)
|
container.insert(rtc, token)
|
||||||
rtc += 1
|
rtc += 1
|
||||||
|
|
||||||
if open_pTag:
|
if open_pTag:
|
||||||
result.insert(rtc, pTag)
|
container.insert(rtc, pTag)
|
||||||
|
|
||||||
for p in result.findAll('p'):
|
for p in container.findAll('p'):
|
||||||
p['class'] = 'description'
|
p['class'] = 'description'
|
||||||
|
|
||||||
for t in result.findAll(text=True):
|
return container.decode_contents()
|
||||||
t.replaceWith(prepare_string_for_xml(unicode_type(t)))
|
|
||||||
|
|
||||||
return result.decode_contents()
|
|
||||||
|
|
||||||
|
|
||||||
def markdown(val):
|
def markdown(val):
|
||||||
@ -155,21 +152,23 @@ def sanitize_comments_html(html):
|
|||||||
return html
|
return html
|
||||||
|
|
||||||
|
|
||||||
def test():
|
def find_tests():
|
||||||
for pat, val in [
|
import unittest
|
||||||
('lineone\n\nlinetwo',
|
|
||||||
'<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
|
class Test(unittest.TestCase):
|
||||||
('a <b>b&c</b>\nf', '<p class="description">a <b>b&c;</b><br />f</p>'),
|
|
||||||
('a <?xml asd> b\n\ncd', '<p class="description">a b</p><p class="description">cd</p>'),
|
def test_comments_to_html(self):
|
||||||
|
for pat, val in [
|
||||||
|
(b'lineone\n\nlinetwo',
|
||||||
|
'<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
|
||||||
|
|
||||||
|
('a <b>b&c</b>\nf',
|
||||||
|
'<p class="description">a <b>b&c</b><br></br>f</p>'),
|
||||||
|
|
||||||
|
('a <?xml asd> b\n\ncd',
|
||||||
|
'<p class="description">a b</p><p class="description">cd</p>'),
|
||||||
]:
|
]:
|
||||||
print()
|
cval = comments_to_html(pat)
|
||||||
print('Testing: %r'%pat)
|
self.assertEqual(cval, val)
|
||||||
cval = comments_to_html(pat)
|
|
||||||
print('Value: %r'%cval)
|
|
||||||
if comments_to_html(pat) != val:
|
|
||||||
print('FAILED')
|
|
||||||
break
|
|
||||||
|
|
||||||
|
return unittest.defaultTestLoader.loadTestsFromTestCase(Test)
|
||||||
if __name__ == '__main__':
|
|
||||||
test()
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user