mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Edit Book: Fix escaped HTML tags in the comments metadata being reported as mis-spelled words. Fixes #1441925 [Spell check flags html tags in metadata](https://bugs.launchpad.net/calibre/+bug/1441925)
This commit is contained in:
parent
203cb98ac0
commit
eec1f44dee
@ -9,10 +9,12 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
from calibre import replace_entities
|
||||
from calibre.spell.break_iterator import split_into_words, index_of
|
||||
from calibre.spell.dictionary import parse_lang_code
|
||||
from calibre.ebooks.oeb.base import barename
|
||||
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
|
||||
from calibre.ebooks.oeb.polish.parsing import parse
|
||||
from calibre.ebooks.oeb.polish.toc import find_existing_toc
|
||||
|
||||
_patterns = None
|
||||
@ -96,6 +98,18 @@ def add_words_from_attr(node, attr, words, file_name, locale):
|
||||
def add_words_from_text(node, attr, words, file_name, locale):
|
||||
add_words(getattr(node, attr), node, words, file_name, locale, (False, attr))
|
||||
|
||||
def add_words_from_escaped_html(text, words, file_name, node, attr, locale):
|
||||
text = replace_entities(text)
|
||||
root = parse('<html><body><div>%s</div></body></html>' % text, decoder=lambda x:x.decode('utf-8'))
|
||||
ewords = defaultdict(list)
|
||||
ewords[None] = 0
|
||||
read_words_from_html(root, ewords, file_name, locale)
|
||||
words[None] += ewords.pop(None)
|
||||
for k, locs in ewords.iteritems():
|
||||
for loc in locs:
|
||||
loc.location_node, loc.node_item = node, (False, attr)
|
||||
words[k].extend(locs)
|
||||
|
||||
_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
|
||||
|
||||
opf_spell_tags = {'title', 'creator', 'subject', 'description', 'publisher'}
|
||||
@ -106,7 +120,10 @@ opf_spell_tags = {'title', 'creator', 'subject', 'description', 'publisher'}
|
||||
def read_words_from_opf(root, words, file_name, book_locale):
|
||||
for tag in root.iterdescendants('*'):
|
||||
if tag.text is not None and barename(tag.tag) in opf_spell_tags:
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
if barename(tag.tag) == 'description':
|
||||
add_words_from_escaped_html(tag.text, words, file_name, tag, 'text', book_locale)
|
||||
else:
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
|
||||
|
||||
ncx_spell_tags = {'text'}
|
||||
|
Loading…
x
Reference in New Issue
Block a user