Ignore non_HTML spine items when extracting text

This commit is contained in:
Kovid Goyal 2022-04-29 08:21:03 +05:30
parent 69c76996dd
commit 72f137c650
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 3 additions and 2 deletions

View File

@ -16,7 +16,7 @@ from calibre.db.annotations import unicode_normalize
from .pool import Pool from .pool import Pool
from .schema_upgrade import SchemaUpgrade from .schema_upgrade import SchemaUpgrade
# TODO: check that closing of db connection works # TODO: check that switching libraries with indexing enabled/disabled works
# TODO: db dump+restore # TODO: db dump+restore
# TODO: calibre export/import # TODO: calibre export/import
# TODO: check library and vacuuming of fts db # TODO: check library and vacuuming of fts db

View File

@ -48,6 +48,7 @@ def html_to_text(root):
def to_text(container, name): def to_text(container, name):
root = container.parsed(name) root = container.parsed(name)
if hasattr(root, 'xpath'):
yield from html_to_text(root) yield from html_to_text(root)