From 72f137c6508792706c76346eaaa8707cba68098d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 29 Apr 2022 08:21:03 +0530
Subject: [PATCH] Ignore non_HTML spine items when extracting text

---
 src/calibre/db/fts/connect.py | 2 +-
 src/calibre/db/fts/text.py    | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/db/fts/connect.py b/src/calibre/db/fts/connect.py
index 092a1634ce..447568c2fc 100644
--- a/src/calibre/db/fts/connect.py
+++ b/src/calibre/db/fts/connect.py
@@ -16,7 +16,7 @@ from calibre.db.annotations import unicode_normalize
 from .pool import Pool
 from .schema_upgrade import SchemaUpgrade
 
-# TODO: check that closing of db connection works
+# TODO: check that switching libraries with indexing enabled/disabled works
 # TODO: db dump+restore
 # TODO: calibre export/import
 # TODO: check library and vacuuming of fts db
diff --git a/src/calibre/db/fts/text.py b/src/calibre/db/fts/text.py
index 4ee03b4aea..3da5698b5d 100644
--- a/src/calibre/db/fts/text.py
+++ b/src/calibre/db/fts/text.py
@@ -48,7 +48,8 @@ def html_to_text(root):
 
 def to_text(container, name):
     root = container.parsed(name)
-    yield from html_to_text(root)
+    if hasattr(root, 'xpath'):
+        yield from html_to_text(root)
 
 
 def is_fmt_ok(input_fmt):