use raw strings where possible to avoid escaping issues

This commit is contained in:
Eli Schwartz 2019-06-26 14:31:37 -04:00
parent 7f978f859c
commit 6ca1739d6b
No known key found for this signature in database
GPG Key ID: CEB167EFB5722BD6
2 changed files with 7 additions and 7 deletions

View File

@ -150,7 +150,7 @@ class OverDrive(Source):
fix_slashes = re.compile(r'\\/')
thumbimage = fix_slashes.sub('/', thumbimage)
worldcatlink = fix_slashes.sub('/', worldcatlink)
cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\\g<img>100', thumbimage)
cover_url = re.sub(r'(?P<img>(Ima?g(eType-)?))200', r'\g<img>100', thumbimage)
social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
series_num = ''
if not series:
@ -256,7 +256,7 @@ class OverDrive(Source):
def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
close_matches = []
raw = re.sub('.*?\\[\\[(?P<content>.*?)\\]\\].*', '[[\\g<content>]]', raw)
raw = re.sub(r'.*?\[\[(?P<content>.*?)\]\].*', r'[[\g<content>]]', raw)
results = json.loads(raw)
# log.error('raw results are:'+str(results))
# The search results are either from a keyword search or a multi-format list from a single ID,

View File

@ -356,13 +356,13 @@ class MobiReader(object):
# Swap inline and block level elements, and order block level elements according to priority
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
self.processed_html = re.sub(
r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\\g<para>'+'\\g<styletags>', self.processed_html)
r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', r'\g<para>'+r'\g<styletags>', self.processed_html)
self.processed_html = re.sub(
r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\\g<styletags>'+'\\g<para>', self.processed_html)
r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', r'\g<styletags>'+r'\g<para>', self.processed_html)
self.processed_html = re.sub(
r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\\g<para>'+'\\g<blockquote>', self.processed_html)
r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', r'\g<para>'+r'\g<blockquote>', self.processed_html)
self.processed_html = re.sub(
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\\g<blockquote>'+'\\g<para>', self.processed_html)
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', r'\g<blockquote>'+r'\g<para>', self.processed_html)
bods = htmls = 0
for x in re.finditer(u'</body>|</html>', self.processed_html):
if x == '</body>':
@ -699,7 +699,7 @@ class MobiReader(object):
continue
if reached and x.tag == 'a':
href = x.get('href', '')
if href and re.match('\\w+://', href) is None:
if href and re.match(r'\w+://', href) is None:
try:
text = u' '.join([t.strip() for t in
x.xpath('descendant::text()')])