mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix #2100891 [[Enhancement] Full-text search for doc/docx/zip (containing htm/html)](https://bugs.launchpad.net/calibre/+bug/2100891)
Forgot to modify the check in db cache for extractable format
This commit is contained in:
parent
00ddf587c7
commit
a7375ad7d4
@ -528,7 +528,7 @@ class Cache:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dispatch_fts_jobs(queue, stop_dispatch, dbref):
|
def dispatch_fts_jobs(queue, stop_dispatch, dbref):
|
||||||
from .fts.text import is_fmt_ok
|
from .fts.text import is_fmt_extractable
|
||||||
|
|
||||||
def do_one():
|
def do_one():
|
||||||
self = dbref()
|
self = dbref()
|
||||||
@ -542,7 +542,7 @@ class Cache:
|
|||||||
if book_id is None:
|
if book_id is None:
|
||||||
return False
|
return False
|
||||||
path = self._format_abspath(book_id, fmt)
|
path = self._format_abspath(book_id, fmt)
|
||||||
if not path or not is_fmt_ok(fmt):
|
if not path or not is_fmt_extractable(fmt):
|
||||||
with self.write_lock:
|
with self.write_lock:
|
||||||
self.backend.remove_dirty_fts(book_id, fmt)
|
self.backend.remove_dirty_fts(book_id, fmt)
|
||||||
self._update_fts_indexing_numbers()
|
self._update_fts_indexing_numbers()
|
||||||
|
@ -79,20 +79,36 @@ def pdftotext(path):
|
|||||||
return clean_ascii_chars(raw).decode('utf-8', 'replace')
|
return clean_ascii_chars(raw).decode('utf-8', 'replace')
|
||||||
|
|
||||||
|
|
||||||
def extract_text(pathtoebook):
|
def can_extract_text(pathtoebook: str, input_fmt: str, exit_stack: contextlib.ExitStack) -> tuple[str, str]:
|
||||||
input_fmt = pathtoebook.rpartition('.')[-1].upper()
|
if not pathtoebook:
|
||||||
ans = ''
|
return pathtoebook, input_fmt
|
||||||
input_plugin = is_fmt_ok(input_fmt)
|
if is_fmt_ok(input_fmt):
|
||||||
with contextlib.ExitStack() as exit_stack:
|
return pathtoebook, input_fmt
|
||||||
if not input_plugin:
|
|
||||||
if input_fmt.lower() in ARCHIVE_FMTS:
|
if input_fmt.lower() in ARCHIVE_FMTS:
|
||||||
try:
|
try:
|
||||||
tdir = exit_stack.enter_context(TemporaryDirectory())
|
tdir = exit_stack.enter_context(TemporaryDirectory())
|
||||||
pathtoebook, input_fmt = unarchive(pathtoebook, tdir)
|
pathtoebook, input_fmt = unarchive(pathtoebook, tdir)
|
||||||
input_fmt = input_fmt.upper()
|
input_fmt = input_fmt.upper()
|
||||||
except Exception:
|
except Exception:
|
||||||
return ans
|
return '', input_fmt
|
||||||
else:
|
else:
|
||||||
|
return pathtoebook, input_fmt
|
||||||
|
return '', input_fmt
|
||||||
|
|
||||||
|
|
||||||
|
def is_fmt_extractable(input_fmt: str) -> bool:
|
||||||
|
if is_fmt_ok(input_fmt):
|
||||||
|
return True
|
||||||
|
return input_fmt.lower() in ARCHIVE_FMTS
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text(pathtoebook):
|
||||||
|
input_fmt = pathtoebook.rpartition('.')[-1].upper()
|
||||||
|
ans = ''
|
||||||
|
input_plugin = is_fmt_ok(input_fmt)
|
||||||
|
with contextlib.ExitStack() as exit_stack:
|
||||||
|
pathtoebook, input_fmt = can_extract_text(pathtoebook, input_fmt, exit_stack)
|
||||||
|
if not pathtoebook:
|
||||||
return ans
|
return ans
|
||||||
input_plugin = plugin_for_input_format(input_fmt)
|
input_plugin = plugin_for_input_format(input_fmt)
|
||||||
if input_fmt == 'PDF':
|
if input_fmt == 'PDF':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user