mirror of
https://github.com/kovidgoyal/calibre.git
synced 2026-04-27 19:29:52 -04:00
Specialize page counting for text files
This commit is contained in:
parent
d992305e8e
commit
d1cd49d8a9
@ -1742,7 +1742,8 @@ class Cache:
|
||||
ans: Pages | None = None
|
||||
with self.safe_read_lock:
|
||||
for pages, algorithm, format, format_size, timestamp in self.backend.execute(
|
||||
f'SELECT pages,algorithm,format,format_size,timestamp FROM books_pages_link WHERE book={book_id:d}'):
|
||||
f'SELECT pages,algorithm,format,format_size,timestamp FROM books_pages_link WHERE book={book_id:d} LIMIT 1'
|
||||
):
|
||||
ans = Pages(int(pages), int(algorithm), str(format), int(format_size), parse_iso8601(timestamp, assume_utc=True))
|
||||
break
|
||||
if queue_if_unavailable and ans is None:
|
||||
@ -1757,7 +1758,7 @@ class Cache:
|
||||
'''
|
||||
if book_id <= 0:
|
||||
if len(self.fields['pages'].table.book_col_map) < len(self.fields['uuid'].table.book_col_map):
|
||||
self.backend.execute('INSERT OR IGNORE INTO books_pages_link(book) SELECT id FROM books')
|
||||
self.backend.execute('INSERT OR IGNORE INTO books_pages_link(book,needs_scan) SELECT id,1 FROM books')
|
||||
else:
|
||||
self.backend.execute(f'UPDATE books_pages_link SET needs_scan=1 WHERE book={int(book_id)}')
|
||||
self.maintain_page_counts.queue_scan(book_id)
|
||||
@ -2109,7 +2110,6 @@ class Cache:
|
||||
needs_close = True
|
||||
try:
|
||||
size, fname = self._do_add_format(book_id, fmt, stream, name)
|
||||
self.queue_pages_scan()
|
||||
finally:
|
||||
if needs_close:
|
||||
stream.close()
|
||||
|
||||
@ -9,6 +9,8 @@ from contextlib import closing, suppress
|
||||
from multiprocessing import Pipe
|
||||
from operator import itemgetter
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre import detect_ncpus
|
||||
from calibre.constants import iswindows
|
||||
from calibre.ebooks.oeb.base import XHTML
|
||||
@ -65,8 +67,11 @@ def get_length(root):
|
||||
return ans
|
||||
|
||||
|
||||
CHARS_PER_PAGE = 1000
|
||||
|
||||
|
||||
def get_page_count(root):
|
||||
return get_length(root) // 1000
|
||||
return get_length(root) // CHARS_PER_PAGE
|
||||
|
||||
|
||||
def calculate_number_of_workers(names, in_process_container, max_workers):
|
||||
@ -114,6 +119,14 @@ def count_pages_oeb(pathtoebook: str, tdir: str, executor: Executor | None = Non
|
||||
return sum(executor.map(process, paths))
|
||||
|
||||
|
||||
def count_pages_txt(pathtoebook: str) -> int:
|
||||
with open(pathtoebook, 'rb') as f:
|
||||
text = f.read().decode('utf-8', 'replace')
|
||||
e = etree.Element('r')
|
||||
e.tail = text
|
||||
return get_num_of_significant_chars(e) // CHARS_PER_PAGE
|
||||
|
||||
|
||||
def count_pages(pathtoebook: str, executor: Executor | None = None) -> int:
|
||||
ext = pathtoebook.rpartition('.')[-1].lower()
|
||||
match ext:
|
||||
@ -125,6 +138,8 @@ def count_pages(pathtoebook: str, executor: Executor | None = None) -> int:
|
||||
return count_pages_cbr(pathtoebook)
|
||||
case 'cb7':
|
||||
return count_pages_cb7(pathtoebook)
|
||||
case 'txt' | 'text' | 'md' | 'textile' | 'markdown':
|
||||
return count_pages_txt(pathtoebook)
|
||||
case _:
|
||||
with TemporaryDirectory() as tdir:
|
||||
return count_pages_oeb(pathtoebook, tdir, executor=executor)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user