From 26ae90f0f57d2386ca7a0cb4c60942556949bb64 Mon Sep 17 00:00:00 2001 From: MisterAP Date: Thu, 7 Nov 2024 14:50:37 +0000 Subject: [PATCH] Divide-by-zero fix and header check fix --- src/calibre/ebooks/pdf/reflow.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 6b10b67f80..9d94698503 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -179,7 +179,7 @@ class Text(Element): self.raw = text.text if text.text else '' for x in text.iterchildren(): self.raw += etree.tostring(x, method='xml', encoding='unicode') - self.average_character_width = self.width/len(self.text_as_string) + self.set_av_char_width() @property def is_empty(self): @@ -195,6 +195,9 @@ class Text(Element): re.match(r'^\s*\s*\s*$', self.raw) is not None ) + def set_av_char_width(self): + self.average_character_width = max(self.width/len(self.text_as_string),0.1) # Ensure never zero + def coalesce(self, other, page_number, left_margin, right_margin): if self.opts.verbose > 2: self.log.debug('Coalescing %r with %r on page %d'%(self.text_as_string, @@ -352,7 +355,7 @@ class Text(Element): self.raw += other.raw if has_float: self.raw += '' - self.average_character_width = self.width/len(self.text_as_string) + self.set_av_char_width() #self.last_left = other.left def to_html(self): @@ -390,7 +393,7 @@ class Paragraph(Text): self.raw = text.text if text.text else '' for x in text.iterchildren(): self.raw += etree.tostring(x, method='xml', encoding='unicode') - self.average_character_width = self.width/len(self.text_as_string) + self.set_av_char_width() def to_html(self): return self.raw @@ -1832,7 +1835,8 @@ class PDFDocument: for i in range(LINE_SCAN_COUNT): if head_match[i] > pages_to_scan or head_match1[i] > pages_to_scan: head_ind = i # Remember the last matching line - if head_match[head_ind] > pages_to_scan or head_match1[head_ind] > pages_to_scan: + if self.pages[head_page].texts \ + and (head_match[head_ind] > pages_to_scan or head_match1[head_ind] > pages_to_scan): t = self.pages[head_page].texts[head_ind] head_skip = t.top + t.height + 1 @@ -1840,7 +1844,8 @@ class PDFDocument: for i in range(LINE_SCAN_COUNT): if foot_match[i] > pages_to_scan or foot_match1[i] > pages_to_scan: foot_ind = i # Remember the last matching line - if foot_match[foot_ind] > pages_to_scan or foot_match1[foot_ind] > pages_to_scan: + if self.pages[foot_page].texts \ + and (foot_match[foot_ind] > pages_to_scan or foot_match1[foot_ind] > pages_to_scan): t = self.pages[foot_page].texts[-foot_ind-1] foot_skip = t.top - 1