mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
use raw strings where possible to avoid escaping issues
This commit is contained in:
parent
7f978f859c
commit
6ca1739d6b
@ -150,7 +150,7 @@ class OverDrive(Source):
|
|||||||
fix_slashes = re.compile(r'\\/')
|
fix_slashes = re.compile(r'\\/')
|
||||||
thumbimage = fix_slashes.sub('/', thumbimage)
|
thumbimage = fix_slashes.sub('/', thumbimage)
|
||||||
worldcatlink = fix_slashes.sub('/', worldcatlink)
|
worldcatlink = fix_slashes.sub('/', worldcatlink)
|
||||||
cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\\g<img>100', thumbimage)
|
cover_url = re.sub(r'(?P<img>(Ima?g(eType-)?))200', r'\g<img>100', thumbimage)
|
||||||
social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
|
social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
|
||||||
series_num = ''
|
series_num = ''
|
||||||
if not series:
|
if not series:
|
||||||
@ -256,7 +256,7 @@ class OverDrive(Source):
|
|||||||
|
|
||||||
def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
||||||
close_matches = []
|
close_matches = []
|
||||||
raw = re.sub('.*?\\[\\[(?P<content>.*?)\\]\\].*', '[[\\g<content>]]', raw)
|
raw = re.sub(r'.*?\[\[(?P<content>.*?)\]\].*', r'[[\g<content>]]', raw)
|
||||||
results = json.loads(raw)
|
results = json.loads(raw)
|
||||||
# log.error('raw results are:'+str(results))
|
# log.error('raw results are:'+str(results))
|
||||||
# The search results are either from a keyword search or a multi-format list from a single ID,
|
# The search results are either from a keyword search or a multi-format list from a single ID,
|
||||||
|
@ -356,13 +356,13 @@ class MobiReader(object):
|
|||||||
# Swap inline and block level elements, and order block level elements according to priority
|
# Swap inline and block level elements, and order block level elements according to priority
|
||||||
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
||||||
self.processed_html = re.sub(
|
self.processed_html = re.sub(
|
||||||
r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\\g<para>'+'\\g<styletags>', self.processed_html)
|
r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', r'\g<para>'+r'\g<styletags>', self.processed_html)
|
||||||
self.processed_html = re.sub(
|
self.processed_html = re.sub(
|
||||||
r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\\g<styletags>'+'\\g<para>', self.processed_html)
|
r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', r'\g<styletags>'+r'\g<para>', self.processed_html)
|
||||||
self.processed_html = re.sub(
|
self.processed_html = re.sub(
|
||||||
r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\\g<para>'+'\\g<blockquote>', self.processed_html)
|
r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', r'\g<para>'+r'\g<blockquote>', self.processed_html)
|
||||||
self.processed_html = re.sub(
|
self.processed_html = re.sub(
|
||||||
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\\g<blockquote>'+'\\g<para>', self.processed_html)
|
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', r'\g<blockquote>'+r'\g<para>', self.processed_html)
|
||||||
bods = htmls = 0
|
bods = htmls = 0
|
||||||
for x in re.finditer(u'</body>|</html>', self.processed_html):
|
for x in re.finditer(u'</body>|</html>', self.processed_html):
|
||||||
if x == '</body>':
|
if x == '</body>':
|
||||||
@ -699,7 +699,7 @@ class MobiReader(object):
|
|||||||
continue
|
continue
|
||||||
if reached and x.tag == 'a':
|
if reached and x.tag == 'a':
|
||||||
href = x.get('href', '')
|
href = x.get('href', '')
|
||||||
if href and re.match('\\w+://', href) is None:
|
if href and re.match(r'\w+://', href) is None:
|
||||||
try:
|
try:
|
||||||
text = u' '.join([t.strip() for t in
|
text = u' '.join([t.strip() for t in
|
||||||
x.xpath('descendant::text()')])
|
x.xpath('descendant::text()')])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user