mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Edit book: Spell check: Fix words after a comment not being checked. Fixes #1962213 [Spell checker ignores text after HTML comment](https://bugs.launchpad.net/calibre/+bug/1962213)
This commit is contained in:
parent
dd0e805838
commit
df66924f1b
@ -163,21 +163,37 @@ opf_spell_tags = {'title', 'creator', 'subject', 'description', 'publisher'}
|
||||
|
||||
def read_words_from_opf(root, words, file_name, book_locale):
|
||||
for tag in root.iterdescendants('*'):
|
||||
if tag.text is not None and barename(tag.tag) in opf_spell_tags:
|
||||
if barename(tag.tag) in opf_spell_tags:
|
||||
if barename(tag.tag) == 'description':
|
||||
if tag.text:
|
||||
add_words_from_escaped_html(tag.text, words, file_name, tag, 'text', book_locale)
|
||||
for child in tag:
|
||||
if child.tail:
|
||||
add_words_from_escaped_html(child.tail, words, file_name, child, 'tail', book_locale)
|
||||
else:
|
||||
if tag.text:
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
for child in tag:
|
||||
if child.tail:
|
||||
add_words_from_text(child, 'tail', words, file_name, book_locale)
|
||||
add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
|
||||
|
||||
|
||||
def count_chars_in_opf(root, counter, file_name, book_locale):
|
||||
for tag in root.iterdescendants('*'):
|
||||
if tag.text is not None and barename(tag.tag) in opf_spell_tags:
|
||||
if barename(tag.tag) in opf_spell_tags:
|
||||
if barename(tag.tag) == 'description':
|
||||
if tag.text:
|
||||
count_chars_in_escaped_html(tag.text, counter, file_name, tag, 'text', book_locale)
|
||||
for child in tag:
|
||||
if child.tail:
|
||||
count_chars_in_escaped_html(child.tail, counter, file_name, tag, 'tail', book_locale)
|
||||
else:
|
||||
if tag.text:
|
||||
count_chars_in_text(tag, 'text', counter, file_name, book_locale)
|
||||
for child in tag:
|
||||
if child.tail:
|
||||
count_chars_in_text(tag, 'tail', counter, file_name, book_locale)
|
||||
count_chars_in_attr(tag, _opf_file_as, counter, file_name, book_locale)
|
||||
|
||||
|
||||
@ -201,7 +217,7 @@ html_spell_tags = {'script', 'style', 'link'}
|
||||
|
||||
|
||||
def read_words_from_html_tag(tag, words, file_name, parent_locale, locale):
|
||||
if tag.text is not None and barename(tag.tag) not in html_spell_tags:
|
||||
if tag.text is not None and isinstance(tag.tag, str) and barename(tag.tag) not in html_spell_tags:
|
||||
add_words_from_text(tag, 'text', words, file_name, locale)
|
||||
for attr in {'alt', 'title'}:
|
||||
add_words_from_attr(tag, attr, words, file_name, locale)
|
||||
@ -210,7 +226,7 @@ def read_words_from_html_tag(tag, words, file_name, parent_locale, locale):
|
||||
|
||||
|
||||
def count_chars_in_html_tag(tag, counter, file_name, parent_locale, locale):
|
||||
if tag.text is not None and barename(tag.tag) not in html_spell_tags:
|
||||
if tag.text is not None and isinstance(tag.tag, str) and barename(tag.tag) not in html_spell_tags:
|
||||
count_chars_in_text(tag, 'text', counter, file_name, locale)
|
||||
for attr in {'alt', 'title'}:
|
||||
count_chars_in_attr(tag, attr, counter, file_name, locale)
|
||||
@ -219,14 +235,15 @@ def count_chars_in_html_tag(tag, counter, file_name, parent_locale, locale):
|
||||
|
||||
|
||||
def locale_from_tag(tag):
|
||||
if 'lang' in tag.attrib:
|
||||
a = tag.attrib
|
||||
if 'lang' in a:
|
||||
try:
|
||||
loc = parse_lang_code(tag.get('lang'))
|
||||
except ValueError:
|
||||
loc = None
|
||||
if loc is not None:
|
||||
return loc
|
||||
if '{http://www.w3.org/XML/1998/namespace}lang' in tag.attrib:
|
||||
if '{http://www.w3.org/XML/1998/namespace}lang' in a:
|
||||
try:
|
||||
loc = parse_lang_code(tag.get('{http://www.w3.org/XML/1998/namespace}lang'))
|
||||
except ValueError:
|
||||
@ -241,7 +258,7 @@ def read_words_from_html(root, words, file_name, book_locale):
|
||||
parent, parent_locale = stack.pop()
|
||||
locale = locale_from_tag(parent) or parent_locale
|
||||
read_words_from_html_tag(parent, words, file_name, parent_locale, locale)
|
||||
stack.extend((tag, locale) for tag in parent.iterchildren('*'))
|
||||
stack.extend((tag, locale) for tag in parent)
|
||||
|
||||
|
||||
def count_chars_in_html(root, counter, file_name, book_locale):
|
||||
@ -250,7 +267,7 @@ def count_chars_in_html(root, counter, file_name, book_locale):
|
||||
parent, parent_locale = stack.pop()
|
||||
locale = locale_from_tag(parent) or parent_locale
|
||||
count_chars_in_html_tag(parent, counter, file_name, parent_locale, locale)
|
||||
stack.extend((tag, locale) for tag in parent.iterchildren('*'))
|
||||
stack.extend((tag, locale) for tag in parent)
|
||||
|
||||
|
||||
def group_sort(locations):
|
||||
|
Loading…
x
Reference in New Issue
Block a user