mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
use raw strings where possible to avoid escaping issues
This commit is contained in:
parent
80c6de0eb9
commit
b9fb80d9b0
@ -294,7 +294,7 @@ class FlowSplitter(object):
|
||||
body = self.get_body(root)
|
||||
if body is None:
|
||||
return False
|
||||
txt = re.sub(u'\\s+|\\xa0', '',
|
||||
txt = re.sub(ur'\s+|\xa0', '',
|
||||
etree.tostring(body, method='text', encoding='unicode'))
|
||||
if len(txt) > 1:
|
||||
return False
|
||||
|
@ -155,7 +155,7 @@ class PMLMLizer(object):
|
||||
|
||||
def get_anchor(self, page, aid):
|
||||
aid = self.get_anchor_id(page.href, aid)
|
||||
return u'\\Q="%s"' % aid
|
||||
return ur'\Q="%s"' % aid
|
||||
|
||||
def remove_newlines(self, text):
|
||||
text = text.replace('\r\n', ' ')
|
||||
@ -186,10 +186,10 @@ class PMLMLizer(object):
|
||||
anchors = set(re.findall(r'(?<=\\Q=").+?(?=")', text))
|
||||
links = set(re.findall(r'(?<=\\q="#).+?(?=")', text))
|
||||
for unused in anchors.difference(links):
|
||||
text = text.replace('\\Q="%s"' % unused, '')
|
||||
text = text.replace(r'\Q="%s"' % unused, '')
|
||||
|
||||
# Remove \Cn tags that are within \x and \Xn tags
|
||||
text = re.sub(unicode_type(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), '\\g<t>\\g<a>\\g<b>\\g<t>', text)
|
||||
text = re.sub(unicode_type(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), r'\g<t>\g<a>\g<b>\g<t>', text)
|
||||
|
||||
# Replace bad characters.
|
||||
text = text.replace(u'\xc2', '')
|
||||
@ -259,7 +259,7 @@ class PMLMLizer(object):
|
||||
'%s.png' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
|
||||
text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])])
|
||||
elif tag == 'hr':
|
||||
w = '\\w'
|
||||
w = r'\w'
|
||||
width = elem.get('width')
|
||||
if width:
|
||||
if not width.endswith('%'):
|
||||
@ -286,17 +286,17 @@ class PMLMLizer(object):
|
||||
toc_title, toc_depth = self.toc[toc_page].get(toc_x, (None, 0))
|
||||
if toc_title:
|
||||
toc_depth = max(min(toc_depth, 4), 0)
|
||||
text.append('\\C%s="%s"' % (toc_depth, toc_title))
|
||||
text.append(r'\C%s="%s"' % (toc_depth, toc_title))
|
||||
|
||||
# Process style information that needs holds a single tag.
|
||||
# Commented out because every page in an OEB book starts with this style.
|
||||
if style['page-break-before'] == 'always':
|
||||
text.append('\\p')
|
||||
text.append(r'\p')
|
||||
|
||||
# Process basic PML tags.
|
||||
pml_tag = TAG_MAP.get(tag, None)
|
||||
if pml_tag and pml_tag not in tag_stack+tags:
|
||||
text.append('\\%s' % pml_tag)
|
||||
text.append(r'\%s' % pml_tag)
|
||||
tags.append(pml_tag)
|
||||
|
||||
# Special processing of tags that require an argument.
|
||||
@ -311,7 +311,7 @@ class PMLMLizer(object):
|
||||
if href not in self.link_hrefs.keys():
|
||||
self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
||||
href = '#%s' % self.link_hrefs[href]
|
||||
text.append('\\q="%s"' % href)
|
||||
text.append(r'\q="%s"' % href)
|
||||
tags.append('q')
|
||||
|
||||
# Anchor ids
|
||||
@ -325,14 +325,14 @@ class PMLMLizer(object):
|
||||
for s in STYLES:
|
||||
style_tag = s[1].get(style[s[0]], None)
|
||||
if style_tag and style_tag not in tag_stack+tags:
|
||||
text.append('\\%s' % style_tag)
|
||||
text.append('r\%s' % style_tag)
|
||||
tags.append(style_tag)
|
||||
|
||||
# margin left
|
||||
try:
|
||||
mms = int(float(style['margin-left']) * 100 / style.height)
|
||||
if mms:
|
||||
text.append('\\T="%s%%"' % mms)
|
||||
text.append(r'\T="%s%%"' % mms)
|
||||
except:
|
||||
pass
|
||||
|
||||
@ -360,7 +360,7 @@ class PMLMLizer(object):
|
||||
# text.append('\n\n')
|
||||
|
||||
if style['page-break-after'] == 'always':
|
||||
text.append('\\p')
|
||||
text.append(r'\p')
|
||||
|
||||
# Process text after this tag but not within another.
|
||||
if hasattr(elem, 'tail') and elem.tail:
|
||||
@ -382,5 +382,5 @@ class PMLMLizer(object):
|
||||
if tag in ('c', 'r'):
|
||||
text.append('\n\\%s' % tag)
|
||||
else:
|
||||
text.append('\\%s' % tag)
|
||||
text.append(r'\%s' % tag)
|
||||
return text
|
||||
|
@ -17,7 +17,7 @@ htmlstrip = re.compile("<" # open
|
||||
|
||||
def clean_attributes(html):
|
||||
while htmlstrip.search(html):
|
||||
html = htmlstrip.sub('<\\1\\2>', html)
|
||||
html = htmlstrip.sub(r'<\1\2>', html)
|
||||
return html
|
||||
|
||||
|
||||
|
@ -83,7 +83,7 @@ def txt2rtf(text):
|
||||
for x in text:
|
||||
val = ord(x)
|
||||
if val == 160:
|
||||
buf.write(u'\\~')
|
||||
buf.write(ur'\~')
|
||||
elif val <= 127:
|
||||
buf.write(unicode_type(x))
|
||||
else:
|
||||
@ -115,7 +115,7 @@ class RTFMLizer(object):
|
||||
self.opts, self.opts.output_profile)
|
||||
self.currently_dumping_item = item
|
||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||
output += '{\\page }'
|
||||
output += r'{\page }'
|
||||
for item in self.oeb_book.spine:
|
||||
self.log.debug('Converting %s to RTF markup...' % item.href)
|
||||
# Removing comments is needed as comments with -- inside them can
|
||||
@ -127,7 +127,7 @@ class RTFMLizer(object):
|
||||
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
self.currently_dumping_item = item
|
||||
output += self.dump_text(content.find(XHTML('body')), stylizer)
|
||||
output += '{\\page }'
|
||||
output += r'{\page }'
|
||||
output += self.footer()
|
||||
output = self.insert_images(output)
|
||||
output = self.clean_text(output)
|
||||
@ -259,7 +259,7 @@ class RTFMLizer(object):
|
||||
block_start = ''
|
||||
block_end = ''
|
||||
if 'block' not in tag_stack:
|
||||
block_start = '{\\par\\pard\\hyphpar '
|
||||
block_start = r'{\par\pard\hyphpar '
|
||||
block_end = '}'
|
||||
text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
|
||||
|
||||
@ -292,7 +292,7 @@ class RTFMLizer(object):
|
||||
end_tag = tag_stack.pop()
|
||||
if end_tag != 'block':
|
||||
if tag in BLOCK_TAGS:
|
||||
text += u'\\par\\pard\\plain\\hyphpar}'
|
||||
text += ur'\par\pard\plain\hyphpar}'
|
||||
else:
|
||||
text += u'}'
|
||||
|
||||
@ -300,6 +300,6 @@ class RTFMLizer(object):
|
||||
if 'block' in tag_stack:
|
||||
text += '%s' % txt2rtf(elem.tail)
|
||||
else:
|
||||
text += '{\\par\\pard\\hyphpar %s}' % txt2rtf(elem.tail)
|
||||
text += r'{\par\pard\hyphpar %s}' % txt2rtf(elem.tail)
|
||||
|
||||
return text
|
||||
|
Loading…
x
Reference in New Issue
Block a user