mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Fix escaped HTML tags in the comments metadata being reported as mis-spelled words. Fixes #1441925 [Spell check flags html tags in metadata](https://bugs.launchpad.net/calibre/+bug/1441925)
This commit is contained in:
parent
203cb98ac0
commit
eec1f44dee
@ -9,10 +9,12 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import sys
|
import sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from calibre import replace_entities
|
||||||
from calibre.spell.break_iterator import split_into_words, index_of
|
from calibre.spell.break_iterator import split_into_words, index_of
|
||||||
from calibre.spell.dictionary import parse_lang_code
|
from calibre.spell.dictionary import parse_lang_code
|
||||||
from calibre.ebooks.oeb.base import barename
|
from calibre.ebooks.oeb.base import barename
|
||||||
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
|
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
|
||||||
|
from calibre.ebooks.oeb.polish.parsing import parse
|
||||||
from calibre.ebooks.oeb.polish.toc import find_existing_toc
|
from calibre.ebooks.oeb.polish.toc import find_existing_toc
|
||||||
|
|
||||||
_patterns = None
|
_patterns = None
|
||||||
@ -96,6 +98,18 @@ def add_words_from_attr(node, attr, words, file_name, locale):
|
|||||||
def add_words_from_text(node, attr, words, file_name, locale):
|
def add_words_from_text(node, attr, words, file_name, locale):
|
||||||
add_words(getattr(node, attr), node, words, file_name, locale, (False, attr))
|
add_words(getattr(node, attr), node, words, file_name, locale, (False, attr))
|
||||||
|
|
||||||
|
def add_words_from_escaped_html(text, words, file_name, node, attr, locale):
|
||||||
|
text = replace_entities(text)
|
||||||
|
root = parse('<html><body><div>%s</div></body></html>' % text, decoder=lambda x:x.decode('utf-8'))
|
||||||
|
ewords = defaultdict(list)
|
||||||
|
ewords[None] = 0
|
||||||
|
read_words_from_html(root, ewords, file_name, locale)
|
||||||
|
words[None] += ewords.pop(None)
|
||||||
|
for k, locs in ewords.iteritems():
|
||||||
|
for loc in locs:
|
||||||
|
loc.location_node, loc.node_item = node, (False, attr)
|
||||||
|
words[k].extend(locs)
|
||||||
|
|
||||||
_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
|
_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
|
||||||
|
|
||||||
opf_spell_tags = {'title', 'creator', 'subject', 'description', 'publisher'}
|
opf_spell_tags = {'title', 'creator', 'subject', 'description', 'publisher'}
|
||||||
@ -106,7 +120,10 @@ opf_spell_tags = {'title', 'creator', 'subject', 'description', 'publisher'}
|
|||||||
def read_words_from_opf(root, words, file_name, book_locale):
|
def read_words_from_opf(root, words, file_name, book_locale):
|
||||||
for tag in root.iterdescendants('*'):
|
for tag in root.iterdescendants('*'):
|
||||||
if tag.text is not None and barename(tag.tag) in opf_spell_tags:
|
if tag.text is not None and barename(tag.tag) in opf_spell_tags:
|
||||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
if barename(tag.tag) == 'description':
|
||||||
|
add_words_from_escaped_html(tag.text, words, file_name, tag, 'text', book_locale)
|
||||||
|
else:
|
||||||
|
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||||
add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
|
add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
|
||||||
|
|
||||||
ncx_spell_tags = {'text'}
|
ncx_spell_tags = {'text'}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user