diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 815d7e12fa..fb0d882de7 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -243,17 +243,17 @@ class Text(Element): if self.font_size_em == other.font_size_em \ and False \ and self.font.id == other.font.id \ - and re.match('$', self.raw) - m_other = re.match('^(.+)$', other.raw) + m_self = re.match(r'^(.+)$', self.raw) + m_other = re.match(r'^(.+)$', other.raw) if m_self and m_other: self.raw = m_self.group(1) other.raw = m_other.group(1) elif self.font_size_em != other.font_size_em \ and self.font_size_em != 1.00 : - if re.match(')(.+)$', self.raw) + m_self = re.match(r'^(.+em">)(.+)$', self.raw) self.raw = m_self.group(1) \ + '' \ + m_self.group(2) + '' @@ -289,7 +289,7 @@ class Text(Element): has_float = '' has_gap = 1 #else leave has_gap - old_float = re.match('^(.*)(.*)\s*$', self.raw) + old_float = re.match(r'^(.*)(.*)\s*$', self.raw) if old_float: # There is already a float as parts of a line are near the right. # Remove the and put it after this part @@ -322,7 +322,7 @@ class Text(Element): # Note that the 2 parts could have different font sizes matchObj = re.match(r'^([^<]*)(]*>)*(]+>)(.*)()*(\s*)$', self.raw) if matchObj is not None : - otherObj = re.match('^([^<]*)(]*>)*(]+>)(.*)()()*(.*)$', other.raw) + otherObj = re.match(r'^([^<]*)(]*>)*(]+>)(.*)()()*(.*)$', other.raw) # There is another href, but is it for the same place? if otherObj is not None and matchObj.group(3) == otherObj.group(3) : m2 = matchObj.group(2) @@ -983,7 +983,7 @@ class Page: # # The left can wander by a few (SAME_INDENT) pixels. # "float:left" occurs where there is a multi-line character, so indentation is messed up - lchar = re.match('.*([^ ])\s*$', first_text.text_as_string) + lchar = re.match(r'.*([^ ])\s*$', first_text.text_as_string) last_char = ' ' # Nothing interesting if lchar is not None: last_char = lchar.group(1) # Final non-space char @@ -1925,8 +1925,8 @@ class PDFDocument: else: merged_len = merged_text.right # Allow where the last line ends with or next line starts with lower case. - if re.match('.*[a-z, -]$', last_line.text_as_string) is not None \ - or re.match('^[a-z, -]', merged_text.text_as_string) is not None : + if re.match(r'.*[a-z, -]$', last_line.text_as_string) is not None \ + or re.match(r'^[a-z, -]', merged_text.text_as_string) is not None : merged_len = merged_text.right # To use merged_len etc. @@ -1936,8 +1936,8 @@ class PDFDocument: and 'href=' not in merged_text.raw \ and merged_text.left < stats_left + merged_text.average_character_width \ and not last_spare > merged_len \ - and not (re.match('.*[.!?](\u201d|”)$', last_line.text_as_string) is not None - and re.match('^(\u201c|“).*', merged_text.text_as_string) is not None): + and not (re.match(r'.*[.!?](\u201d|”)$', last_line.text_as_string) is not None + and re.match(r'^(\u201c|“).*', merged_text.text_as_string) is not None): merge_done = True # We don't want to merge partial pages # i.e. if this is the last line, preserve its top/bottom till after merge @@ -1970,7 +1970,7 @@ class PDFDocument: and (len(self.pages[pind+1].texts) == 0 \ or self.pages[pind+1].texts[0].top > self.pages[pind+1].imgs[0].top)): page.page_break_after = True - elif (re.match('.*[a-z, ]$', last_line.text_as_string) is not None \ + elif (re.match(r'.*[a-z, ]$', last_line.text_as_string) is not None \ or last_line.final_width > page.width*self.opts.unwrap_factor): # or (last_line.right * 100.0 / page.right_margin) > LAST_LINE_PERCENT): candidate = page