From 72f137c6508792706c76346eaaa8707cba68098d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 29 Apr 2022 08:21:03 +0530 Subject: [PATCH] Ignore non_HTML spine items when extracting text --- src/calibre/db/fts/connect.py | 2 +- src/calibre/db/fts/text.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/db/fts/connect.py b/src/calibre/db/fts/connect.py index 092a1634ce..447568c2fc 100644 --- a/src/calibre/db/fts/connect.py +++ b/src/calibre/db/fts/connect.py @@ -16,7 +16,7 @@ from calibre.db.annotations import unicode_normalize from .pool import Pool from .schema_upgrade import SchemaUpgrade -# TODO: check that closing of db connection works +# TODO: check that switching libraries with indexing enabled/disabled works # TODO: db dump+restore # TODO: calibre export/import # TODO: check library and vacuuming of fts db diff --git a/src/calibre/db/fts/text.py b/src/calibre/db/fts/text.py index 4ee03b4aea..3da5698b5d 100644 --- a/src/calibre/db/fts/text.py +++ b/src/calibre/db/fts/text.py @@ -48,7 +48,8 @@ def html_to_text(root): def to_text(container, name): root = container.parsed(name) - yield from html_to_text(root) + if hasattr(root, 'xpath'): + yield from html_to_text(root) def is_fmt_ok(input_fmt):