diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index d0e1a334ec..a42bae67a5 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -294,7 +294,7 @@ class FlowSplitter(object): body = self.get_body(root) if body is None: return False - txt = re.sub(u'\\s+|\\xa0', '', + txt = re.sub(ur'\s+|\xa0', '', etree.tostring(body, method='text', encoding='unicode')) if len(txt) > 1: return False diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 772117321c..e99ec66226 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -155,7 +155,7 @@ class PMLMLizer(object): def get_anchor(self, page, aid): aid = self.get_anchor_id(page.href, aid) - return u'\\Q="%s"' % aid + return ur'\Q="%s"' % aid def remove_newlines(self, text): text = text.replace('\r\n', ' ') @@ -186,10 +186,10 @@ class PMLMLizer(object): anchors = set(re.findall(r'(?<=\\Q=").+?(?=")', text)) links = set(re.findall(r'(?<=\\q="#).+?(?=")', text)) for unused in anchors.difference(links): - text = text.replace('\\Q="%s"' % unused, '') + text = text.replace(r'\Q="%s"' % unused, '') # Remove \Cn tags that are within \x and \Xn tags - text = re.sub(unicode_type(r'(?msu)(?P\\(x|X[0-4]))(?P.*?)(?P\\C[0-4]\s*=\s*"[^"]*")(?P.*?)(?P=t)'), '\\g\\g\\g\\g', text) + text = re.sub(unicode_type(r'(?msu)(?P\\(x|X[0-4]))(?P.*?)(?P\\C[0-4]\s*=\s*"[^"]*")(?P.*?)(?P=t)'), r'\g\g\g\g', text) # Replace bad characters. text = text.replace(u'\xc2', '') @@ -259,7 +259,7 @@ class PMLMLizer(object): '%s.png' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00') text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])]) elif tag == 'hr': - w = '\\w' + w = r'\w' width = elem.get('width') if width: if not width.endswith('%'): @@ -286,17 +286,17 @@ class PMLMLizer(object): toc_title, toc_depth = self.toc[toc_page].get(toc_x, (None, 0)) if toc_title: toc_depth = max(min(toc_depth, 4), 0) - text.append('\\C%s="%s"' % (toc_depth, toc_title)) + text.append(r'\C%s="%s"' % (toc_depth, toc_title)) # Process style information that needs holds a single tag. # Commented out because every page in an OEB book starts with this style. if style['page-break-before'] == 'always': - text.append('\\p') + text.append(r'\p') # Process basic PML tags. pml_tag = TAG_MAP.get(tag, None) if pml_tag and pml_tag not in tag_stack+tags: - text.append('\\%s' % pml_tag) + text.append(r'\%s' % pml_tag) tags.append(pml_tag) # Special processing of tags that require an argument. @@ -311,7 +311,7 @@ class PMLMLizer(object): if href not in self.link_hrefs.keys(): self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys()) href = '#%s' % self.link_hrefs[href] - text.append('\\q="%s"' % href) + text.append(r'\q="%s"' % href) tags.append('q') # Anchor ids @@ -325,14 +325,14 @@ class PMLMLizer(object): for s in STYLES: style_tag = s[1].get(style[s[0]], None) if style_tag and style_tag not in tag_stack+tags: - text.append('\\%s' % style_tag) + text.append('r\%s' % style_tag) tags.append(style_tag) # margin left try: mms = int(float(style['margin-left']) * 100 / style.height) if mms: - text.append('\\T="%s%%"' % mms) + text.append(r'\T="%s%%"' % mms) except: pass @@ -360,7 +360,7 @@ class PMLMLizer(object): # text.append('\n\n') if style['page-break-after'] == 'always': - text.append('\\p') + text.append(r'\p') # Process text after this tag but not within another. if hasattr(elem, 'tail') and elem.tail: @@ -382,5 +382,5 @@ class PMLMLizer(object): if tag in ('c', 'r'): text.append('\n\\%s' % tag) else: - text.append('\\%s' % tag) + text.append(r'\%s' % tag) return text diff --git a/src/calibre/ebooks/readability/cleaners.py b/src/calibre/ebooks/readability/cleaners.py index 057fcf17b3..d30216c4d8 100644 --- a/src/calibre/ebooks/readability/cleaners.py +++ b/src/calibre/ebooks/readability/cleaners.py @@ -17,7 +17,7 @@ htmlstrip = re.compile("<" # open def clean_attributes(html): while htmlstrip.search(html): - html = htmlstrip.sub('<\\1\\2>', html) + html = htmlstrip.sub(r'<\1\2>', html) return html diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py index d4b339c53c..7f6cc91c50 100644 --- a/src/calibre/ebooks/rtf/rtfml.py +++ b/src/calibre/ebooks/rtf/rtfml.py @@ -83,7 +83,7 @@ def txt2rtf(text): for x in text: val = ord(x) if val == 160: - buf.write(u'\\~') + buf.write(ur'\~') elif val <= 127: buf.write(unicode_type(x)) else: @@ -115,7 +115,7 @@ class RTFMLizer(object): self.opts, self.opts.output_profile) self.currently_dumping_item = item output += self.dump_text(item.data.find(XHTML('body')), stylizer) - output += '{\\page }' + output += r'{\page }' for item in self.oeb_book.spine: self.log.debug('Converting %s to RTF markup...' % item.href) # Removing comments is needed as comments with -- inside them can @@ -127,7 +127,7 @@ class RTFMLizer(object): stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) self.currently_dumping_item = item output += self.dump_text(content.find(XHTML('body')), stylizer) - output += '{\\page }' + output += r'{\page }' output += self.footer() output = self.insert_images(output) output = self.clean_text(output) @@ -259,7 +259,7 @@ class RTFMLizer(object): block_start = '' block_end = '' if 'block' not in tag_stack: - block_start = '{\\par\\pard\\hyphpar ' + block_start = r'{\par\pard\hyphpar ' block_end = '}' text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end) @@ -292,7 +292,7 @@ class RTFMLizer(object): end_tag = tag_stack.pop() if end_tag != 'block': if tag in BLOCK_TAGS: - text += u'\\par\\pard\\plain\\hyphpar}' + text += ur'\par\pard\plain\hyphpar}' else: text += u'}' @@ -300,6 +300,6 @@ class RTFMLizer(object): if 'block' in tag_stack: text += '%s' % txt2rtf(elem.tail) else: - text += '{\\par\\pard\\hyphpar %s}' % txt2rtf(elem.tail) + text += r'{\par\pard\hyphpar %s}' % txt2rtf(elem.tail) return text