mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Ignore non_HTML spine items when extracting text
This commit is contained in:
parent
69c76996dd
commit
72f137c650
@ -16,7 +16,7 @@ from calibre.db.annotations import unicode_normalize
|
|||||||
from .pool import Pool
|
from .pool import Pool
|
||||||
from .schema_upgrade import SchemaUpgrade
|
from .schema_upgrade import SchemaUpgrade
|
||||||
|
|
||||||
# TODO: check that closing of db connection works
|
# TODO: check that switching libraries with indexing enabled/disabled works
|
||||||
# TODO: db dump+restore
|
# TODO: db dump+restore
|
||||||
# TODO: calibre export/import
|
# TODO: calibre export/import
|
||||||
# TODO: check library and vacuuming of fts db
|
# TODO: check library and vacuuming of fts db
|
||||||
|
@ -48,6 +48,7 @@ def html_to_text(root):
|
|||||||
|
|
||||||
def to_text(container, name):
|
def to_text(container, name):
|
||||||
root = container.parsed(name)
|
root = container.parsed(name)
|
||||||
|
if hasattr(root, 'xpath'):
|
||||||
yield from html_to_text(root)
|
yield from html_to_text(root)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user