Ignore non_HTML spine items when extracting text

2025-07-09 03:04:10 -04:00 · 2022-04-29 08:21:03 +05:30 · 2022-04-29 08:21:03 +05:30 · 72f137c650
commit 72f137c650
parent 69c76996dd
2 changed files with 3 additions and 2 deletions
--- a/src/calibre/db/fts/connect.py
+++ b/src/calibre/db/fts/connect.py
@ -16,7 +16,7 @@ from calibre.db.annotations import unicode_normalize
 from .pool import Pool
 from .schema_upgrade import SchemaUpgrade
-# TODO: check that closing of db connection works
+# TODO: check that switching libraries with indexing enabled/disabled works
 # TODO: db dump+restore
 # TODO: calibre export/import
 # TODO: check library and vacuuming of fts db
--- a/src/calibre/db/fts/text.py
+++ b/src/calibre/db/fts/text.py
@ -48,6 +48,7 @@ def html_to_text(root):
 def to_text(container, name):
    root = container.parsed(name)
    if hasattr(root, 'xpath'):
        yield from html_to_text(root)