always use raw-string for regex (auto-fix)

ruff 'RUF039'
This commit is contained in:
un-pogaz 2025-01-24 11:14:20 +01:00
parent 567a0187f3
commit ac6912565a
97 changed files with 315 additions and 314 deletions

View File

@ -49,7 +49,7 @@ def merge():
clone_node(child, symbol)
ans.append(symbol)
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
ans = re.sub(r'<svg[^>]+>', '<svg style="display:none">', ans, count=1)
return ans

View File

@ -29,6 +29,6 @@ class AlejaKomiksu(BasicNewsRecipe):
def skip_ad_pages(self, soup):
tag = soup.find(attrs={'class': 'rodzaj'})
if tag and tag.a.string.lower().strip() == 'recenzje':
link = soup.find(text=re.compile('recenzuje'))
link = soup.find(text=re.compile(r'recenzuje'))
if link:
return self.index_to_soup(link.parent['href'], raw=True)

View File

@ -63,12 +63,12 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
dict(
attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}),
dict(name='img', attrs={'alt': 'logo'}),
dict(name='div', attrs={'class': re.compile('infoEl')}),
dict(name='span', attrs={'class': re.compile('loupe')})
dict(name='div', attrs={'class': re.compile(r'infoEl')}),
dict(name='span', attrs={'class': re.compile(r'loupe')})
]
remove_tags_after = [
dict(name='div', attrs={'itemprop': re.compile('articleBody')})
dict(name='div', attrs={'itemprop': re.compile(r'articleBody')})
]
def preprocess_html(self, soup):

View File

@ -58,7 +58,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
def get_cover_url(self):
soup = self.index_to_soup('http://www.birminghammail.co.uk')
cov = soup.find(attrs={'src': re.compile(
'http://images.icnetwork.co.uk/upl/birm')})
r'http://images.icnetwork.co.uk/upl/birm')})
cov = str(cov)
cov2 = re.findall(
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}),
dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self):

View File

@ -39,12 +39,12 @@ class CSMonitor(BasicNewsRecipe):
}
remove_tags = [
dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict(attrs={'class': re.compile('(^|| )podStoryRel($|| )', re.DOTALL)}), dict(
dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict(attrs={'class': re.compile(r'(^|| )podStoryRel($|| )', re.DOTALL)}), dict(
attrs={'class': ['bottom-rel', 'hide']}), dict(attrs={'id': ['pgallerycarousel_enlarge', 'pgallerycarousel_related']})
]
keep_only_tags = [
dict(name='h1', attrs={'class': 'head'}), dict(name='h2', attrs={'class': 'subhead'}), dict(attrs={'class': [
'sByline', 'thePhoto', 'ui-body-header']}), dict(attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)})
'sByline', 'thePhoto', 'ui-body-header']}), dict(attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)})
]
remove_attributes = ['xmlns:fb']
@ -74,11 +74,11 @@ class CSMonitor(BasicNewsRecipe):
nurl = 'http://www.csmonitor.com' + nexttag['href']
soup2 = self.index_to_soup(nurl)
texttag = soup2.find(
attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)})
attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)})
if texttag:
appendtag = soup.find(
attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)})
for citem in texttag.findAll(attrs={'class': [re.compile('(^|| )podStoryRel($|| )', re.DOTALL), 'bottom-rel', 'hide']}):
attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)})
for citem in texttag.findAll(attrs={'class': [re.compile(r'(^|| )podStoryRel($|| )', re.DOTALL), 'bottom-rel', 'hide']}):
citem.extract()
self.append_page(soup2)
texttag.extract()

View File

@ -47,7 +47,7 @@ class Chronicle(BasicNewsRecipe):
# Find cover
cover = soup0.find('div', attrs={
'class': 'side-content'}).find(attrs={'src': re.compile('photos/biz/Current')})
'class': 'side-content'}).find(attrs={'src': re.compile(r'photos/biz/Current')})
if cover is not None:
if 'chronicle.com' in cover['src']:
self.cover_url = cover['src']

View File

@ -86,7 +86,7 @@ class CourrierInternational(BasicNewsRecipe):
return br
def preprocess_html(self, soup):
for link in soup.findAll('a', href=re.compile('^/')):
for link in soup.findAll('a', href=re.compile(r'^/')):
link['href'] = 'http://www.courrierinternational.com' + link['href']
return soup

View File

@ -71,10 +71,10 @@ class AdvancedUserRecipe1467571059(BasicNewsRecipe):
remove_tags = [
dict(name=['embed', 'object']),
dict(name='div', attrs={'class':['note NotePortrait', 'note']}),
dict(name='ul', attrs={'class':re.compile('article__share')}),
dict(name='ul', attrs={'class':re.compile(r'article__share')}),
dict(name='div', attrs={'class':'slideshow__controls'}),
dict(name='a', attrs={'role':'button'}),
dict(name='figure', attrs={'class':re.compile('video')})
dict(name='figure', attrs={'class':re.compile(r'video')})
]
remove_attributes = ['width', 'height']

View File

@ -31,9 +31,9 @@ class deredactie(BasicNewsRecipe):
catnames = {}
soup = self.index_to_soup(
'http://www.deredactie.be/cm/vrtnieuws.deutsch')
for elem in soup.findAll('li', attrs={'id': re.compile('^navItem[2-9]')}):
for elem in soup.findAll('li', attrs={'id': re.compile(r'^navItem[2-9]')}):
a = elem.find('a', href=True)
m = re.search('(?<=/)[^/]*$', a['href'])
m = re.search(r'(?<=/)[^/]*$', a['href'])
cat = str(m.group(0))
categories.append(cat)
catnames[cat] = a['title']
@ -45,7 +45,7 @@ class deredactie(BasicNewsRecipe):
articles = []
soup = self.index_to_soup(
'http://www.deredactie.be/cm/vrtnieuws.deutsch/' + cat)
for a in soup.findAll('a', attrs={'href': re.compile('deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_')}):
for a in soup.findAll('a', attrs={'href': re.compile(r'deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_')}):
skip_this_article = False
url = a['href'].strip()
if url.startswith('/'):

View File

@ -51,7 +51,7 @@ class Donga(BasicNewsRecipe):
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
# Return print version url with syntax:
# https://www.donga.com/news/View?gid=[gid]&date=[date]
reobject = re.search('(?<=/all/)([0-9]*)/([0-9]*)', url)
reobject = re.search(r'(?<=/all/)([0-9]*)/([0-9]*)', url)
date = reobject.group(1)
gid = reobject.group(2)

View File

@ -33,7 +33,7 @@ class dwutygodnik(BasicNewsRecipe):
browser.open('http://www.dwutygodnik.com/')
# find the link
epublink = browser.find_link(text_regex=re.compile('Wydanie EPUB'))
epublink = browser.find_link(text_regex=re.compile(r'Wydanie EPUB'))
# download ebook
self.report_progress(0, _('Downloading ePUB'))

View File

@ -21,8 +21,8 @@ class Dziennik_pl(BasicNewsRecipe):
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}'
preprocess_regexps = [(re.compile('Komentarze:'), lambda m: ''), (re.compile(
'<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
preprocess_regexps = [(re.compile(r'Komentarze:'), lambda m: ''), (re.compile(
r'<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
keep_only_tags = [dict(id='article')]
remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa: E501
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}),
dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self):

View File

@ -51,7 +51,7 @@ class Esensja(BasicNewsRecipe):
def parse_index(self):
soup = self.index_to_soup('http://www.esensja.pl/magazyn/')
a = soup.find('a', attrs={'href': re.compile('.*/index.html')})
a = soup.find('a', attrs={'href': re.compile(r'.*/index.html')})
year = a['href'].split('/')[0]
month = a['href'].split('/')[1]
self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/'
@ -149,7 +149,7 @@ class Esensja(BasicNewsRecipe):
info = tag.find(attrs={'class': 'img_info'})
text = str(tag)
if not src:
src = re.search('src="[^"]*?"', text)
src = re.search(r'src="[^"]*?"', text)
if src:
src = src.group(0)
src = src[5:].replace('//', '/')

View File

@ -95,7 +95,7 @@ class EsensjaRSS(BasicNewsRecipe):
info = tag.find(attrs={'class': 'img_info'})
text = str(tag)
if not src:
src = re.search('src="[^"]*?"', text)
src = re.search(r'src="[^"]*?"', text)
if src:
src = src.group(0)
src = src[5:].replace('//', '/')

View File

@ -109,7 +109,7 @@ img { background: none !important; float: none; margin: 0px; }
for post in soup.findAll('a'):
strpost = str(post)
if re.match('<a href="https://www1.folha.uol.com.br/.*/"><svg aria-hidden="true" class="icon icon--star"', strpost):
if re.match(r'<a href="https://www1.folha.uol.com.br/.*/"><svg aria-hidden="true" class="icon icon--star"', strpost):
if articles:
feeds.append((section_title, articles))
self.log()

View File

@ -39,7 +39,7 @@ class AdvancedUserRecipe1515196393(BasicNewsRecipe):
feeds = []
br = self.get_browser()
self.ctdir = PersistentTemporaryDirectory()
for x in toc.findAll(['li'], attrs={'class': re.compile('.*get_content.*')}):
for x in toc.findAll(['li'], attrs={'class': re.compile(r'.*get_content.*')}):
edwo = x.find('a')
title = self.tag_to_string(edwo)
self.log('\t\tFound article:', title)

View File

@ -54,7 +54,7 @@ class GN(BasicNewsRecipe):
}]
feeds.append((u'Na dobry początek', articles))
# columns:
for addr in soup.findAll('a', attrs={'href': re.compile('kategoria')}):
for addr in soup.findAll('a', attrs={'href': re.compile(r'kategoria')}):
if not addr.span:
main_block = self.index_to_soup(
'http://www.gosc.pl' + addr['href'])

View File

@ -50,7 +50,7 @@ class GN(BasicNewsRecipe):
}]
feeds.append((u'Na dobry początek', articles))
# columns:
for addr in soup.findAll('a', attrs={'href': re.compile('kategoria')}):
for addr in soup.findAll('a', attrs={'href': re.compile(r'kategoria')}):
if not addr.span:
main_block = self.index_to_soup(
'http://www.gosc.pl' + addr['href'])

View File

@ -50,10 +50,10 @@ class GazetvanAntwerpen(BasicNewsRecipe):
remove_tags = [
dict(name=['embed', 'object']),
dict(name='div', attrs={'class': ['note NotePortrait', 'note']}),
dict(name='ul', attrs={'class': re.compile('article__share')}),
dict(name='ul', attrs={'class': re.compile(r'article__share')}),
dict(name='div', attrs={'class': 'slideshow__controls'}),
dict(name='a', attrs={'role': 'button'}),
dict(name='figure', attrs={'class': re.compile('video')})
dict(name='figure', attrs={'class': re.compile(r'video')})
]
remove_attributes = ['width', 'height']

View File

@ -78,7 +78,7 @@ class HNWithCommentsLink(BasicNewsRecipe):
br = td.find('br')
if br:
br.extract()
reply = td.find('a', attrs={'href': re.compile('^reply?')})
reply = td.find('a', attrs={'href': re.compile(r'^reply?')})
if reply:
reply.parent.extract()
td.name = 'div'

View File

@ -59,7 +59,7 @@ class Handelsblatt(BasicNewsRecipe):
dict(name='aside', attrs={'class': ['vhb-article-element vhb-left',
'vhb-article-element vhb-left vhb-teasergallery',
'vhb-article-element vhb-left vhb-shorttexts']}),
dict(name='aside', attrs={'class': re.compile('vhb-club-events')}),
dict(name='aside', attrs={'class': re.compile(r'vhb-club-events')}),
dict(name='article', attrs={'class': ['vhb-imagegallery vhb-teaser',
'vhb-teaser vhb-type-video']}),
dict(name='small', attrs={'class': ['vhb-credit']}),
@ -70,14 +70,14 @@ class Handelsblatt(BasicNewsRecipe):
'opinary-widget-wrapper',
'vhb-article__content-element--shorttextgallery',
'vhb-hollow-area vhb-hollow-area--col-1']}),
dict(name='div', attrs={'class': re.compile('stepstone')}),
dict(name='div', attrs={'class': re.compile('vhb-imagegallery')}),
dict(name='div', attrs={'class': re.compile(r'stepstone')}),
dict(name='div', attrs={'class': re.compile(r'vhb-imagegallery')}),
dict(name='div', attrs={'id': ['highcharts_infografik']}),
dict(name='div', attrs={'id': re.compile('dax-sentiment')}),
dict(name=['div', 'section'], attrs={'class': re.compile('slider')}),
dict(name='div', attrs={'id': re.compile(r'dax-sentiment')}),
dict(name=['div', 'section'], attrs={'class': re.compile(r'slider')}),
dict(name='a', attrs={'class': ['twitter-follow-button']}),
dict(name='img', attrs={'class': ['highlight-icon', 'lb-author__avatar', 'pin-icon']}),
dict(name='img', attrs={'alt': re.compile('Handelsblatt Morning Briefing')}),
dict(name='img', attrs={'alt': re.compile(r'Handelsblatt Morning Briefing')}),
dict(name=['blockquote', 'button', 'link'])
]
@ -138,7 +138,7 @@ class Handelsblatt(BasicNewsRecipe):
def postprocess_html(self, soup, first_fetch):
# convert lists of author(s) and date(s) into simple text
for cap in soup.find_all('div', {'class': re.compile('vhb-article-caption')}):
for cap in soup.find_all('div', {'class': re.compile(r'vhb-article-caption')}):
cap.replace_with(cap.encode_contents().decode('utf-8').strip() + ' ')
for row in soup.find_all('div', {'class': 'vhb-article-author-row'}):
for ul in row.find_all('ul'):
@ -160,7 +160,7 @@ class Handelsblatt(BasicNewsRecipe):
fig.find('div', {'class': 'vhb-caption'}).replace_with(cap)
# remove references to related articles
for strong in soup.find_all('strong'):
if strong.string and (re.match('^Mehr:? ?', strong.string) or re.match('^>>.*', strong.string)):
if strong.string and (re.match(r'^Mehr:? ?', strong.string) or re.match(r'^>>.*', strong.string)):
p_parent = strong.find_parent('p')
if p_parent:
p_parent.decompose()

View File

@ -49,7 +49,7 @@ class HistoryToday(BasicNewsRecipe):
# Go to issue
soup = self.index_to_soup('https://www.historytoday.com/contents')
cover = soup.find('div', attrs={
'id': 'content-area'}).find('img', attrs={'src': re.compile('.*cover.*')})['src']
'id': 'content-area'}).find('img', attrs={'src': re.compile(r'.*cover.*')})['src']
self.cover_url = cover
self.log(self.cover_url)

View File

@ -89,7 +89,7 @@ class IndiaToday(BasicNewsRecipe):
return soup
def preprocess_raw_html(self, raw, *a):
m = re.search('id="__NEXT_DATA__" type="application/json">', raw)
m = re.search(r'id="__NEXT_DATA__" type="application/json">', raw)
raw = raw[m.start():]
raw = raw.split('>', 1)[1]
data = json.JSONDecoder().raw_decode(raw)[0]

View File

@ -36,7 +36,7 @@ class JoopRecipe(BasicNewsRecipe):
keep_only_tags.append(
dict(name='h2', attrs={'class': 'columnhead smallline'}))
keep_only_tags.append(
dict(name='div', attrs={'class': re.compile('article.*')}))
dict(name='div', attrs={'class': re.compile(r'article.*')}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}

View File

@ -44,16 +44,16 @@ class Kurier(BasicNewsRecipe):
]
keep_only_tags = [
dict(name='article', attrs={'class': re.compile('main-article')})
dict(name='article', attrs={'class': re.compile(r'main-article')})
]
remove_tags = [
dict(name='div', attrs={'class': 'social-media-container'}),
dict(name='section', attrs={'class': 'tags'}),
dict(name='section', attrs={'class': re.compile('comment-box')}),
dict(name='section', attrs={'class': re.compile('related-content')}),
dict(name='section', attrs={'class': re.compile('article-slider')}),
dict(name='section', attrs={'class': re.compile('commentcontainer')}),
dict(name='section', attrs={'class': re.compile(r'comment-box')}),
dict(name='section', attrs={'class': re.compile(r'related-content')}),
dict(name='section', attrs={'class': re.compile(r'article-slider')}),
dict(name='section', attrs={'class': re.compile(r'commentcontainer')}),
dict(name='blockquote')
]

View File

@ -21,7 +21,7 @@ class Kyungyhang(BasicNewsRecipe):
remove_javascript = True
preprocess_regexps = [
(re.compile("<div class='ad_movFocus'.*</html>",
(re.compile(r"<div class='ad_movFocus'.*</html>",
re.DOTALL | re.IGNORECASE), lambda match: '</html>'),
]

View File

@ -121,7 +121,7 @@ class LeMondeAbonne(BasicNewsRecipe):
files = os.listdir(path)
nb_index_files = len([
name for name in files if re.match('frame_gauche_[0-9]+.html', name)
name for name in files if re.match(r'frame_gauche_[0-9]+.html', name)
])
flux = []

View File

@ -144,7 +144,7 @@ class WeeklyLWN(BasicNewsRecipe):
# Most articles have anchors in their titles, *except* the
# security vulnerabilities
article_anchor = curr.find(
name='a', attrs={'href': re.compile('^/Articles/')})
name='a', attrs={'href': re.compile(r'^/Articles/')})
if article_anchor:
article_url = article_anchor.get('href')

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}),
dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self):

View File

@ -71,21 +71,21 @@ class Newsweek(BasicNewsRecipe):
strong = p.find('strong')
if strong:
newest = re.compile(
'Tekst pochodzi z najnowszego numeru Tygodnika Newsweek')
r'Tekst pochodzi z najnowszego numeru Tygodnika Newsweek')
if newest.search(str(strong)):
strong.extract()
continue
itunes = p.find('a')
if itunes:
reurl = re.compile('itunes.apple.com')
reurl = re.compile(r'itunes.apple.com')
if reurl.search(str(itunes['href'])):
p.extract()
continue
imagedesc = p.find('div', attrs={'class': 'image-desc'})
if imagedesc:
redesc = re.compile('Okładka numeru')
redesc = re.compile(r'Okładka numeru')
if (redesc.search(str(imagedesc))):
p.extract()
continue

View File

@ -77,10 +77,10 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe):
print('-------------------------get index of paper--------------------------------')
result = []
soup = self.index_to_soup('http://www.nikkei.com/paper/')
sections = soup.findAll(attrs={'class': re.compile('.*cmn-article_title.*')})
sections = soup.findAll(attrs={'class': re.compile(r'.*cmn-article_title.*')})
for sect in sections:
sect_title = sect.find(attrs={'class' : re.compile('.*cmnc-((large)|(middle)|(small)).*')})
sect_title = sect.find(attrs={'class' : re.compile(r'.*cmnc-((large)|(middle)|(small)).*')})
if sect_title is None:
continue
sect_title = sect_title.contents[0]

View File

@ -62,7 +62,7 @@ class NRCNext(BasicNewsRecipe):
zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
zfile.extractall(self.output_dir)
namelist = zfile.namelist()
emre = re.compile('&lt;em(?:.*)&gt;(.*)&lt;/em&gt;')
emre = re.compile(r'&lt;em(?:.*)&gt;(.*)&lt;/em&gt;')
subst = '\\1'
for name in namelist:
_, ext = os.path.splitext(name)

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}),
dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self):

View File

@ -48,7 +48,7 @@ class outlook(BasicNewsRecipe):
return [('Articles', ans)]
def preprocess_raw_html(self, raw, *a):
m = re.search('id="__NEXT_DATA__" type="application/json">', raw)
m = re.search(r'id="__NEXT_DATA__" type="application/json">', raw)
raw = raw[m.start():]
raw = raw.split('>', 1)[1]
data = json.JSONDecoder().raw_decode(raw)[0]

View File

@ -41,9 +41,9 @@ class Polter(BasicNewsRecipe):
(u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')]
def preprocess_html(self, soup):
for s in soup.findAll(attrs={'style': re.compile('float: ?left')}):
for s in soup.findAll(attrs={'style': re.compile(r'float: ?left')}):
s['class'] = 'floatleft'
for s in soup.findAll(attrs={'style': re.compile('float: ?right')}):
for s in soup.findAll(attrs={'style': re.compile(r'float: ?right')}):
s['class'] = 'floatright'
for s in soup.findAll(style=True):
if 'bold;' in s['style']:

View File

@ -161,9 +161,9 @@ class PrivateEyeRecipe(BasicNewsRecipe):
{'name': 'div', 'attrs': {'id': 'about-covers'}},
{'name': 'a', ' attrs': {'href': 'https://shop.private-eye.co.uk'}},
{'name': 'iframe'},
{'name': 'link', 'attrs': {'href': re.compile('/javastyle/lightbox/')}},
{'name': 'link', 'attrs': {'href': re.compile('/javastyle/news_ticker/')}},
{'name': 'link', 'attrs': {'href': re.compile('/javastyle/media-queries-')}},
{'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/lightbox/')}},
{'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/news_ticker/')}},
{'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/media-queries-')}},
]
# Convert headers to h1, strapline to h4

View File

@ -54,7 +54,7 @@ class ScienceNewsIssue(BasicNewsRecipe):
# Get articles
soup = self.index_to_soup(url)
soup = soup.find('main', attrs={'id':'content'})
re_article = re.compile('https://www.sciencenews.org/article/')
re_article = re.compile(r'https://www.sciencenews.org/article/')
stories = []
past_urls = set()
for sec in soup.find_all(href=re_article):

View File

@ -76,8 +76,8 @@ class SolHaberRecipe(BasicNewsRecipe):
result = []
articles_dict = {}
author_regexp = re.compile('^http://.*?/yazarlar/(.*?)/.*$')
category_regexp = re.compile('^http://.*?/(.+?)/.*$')
author_regexp = re.compile(r'^http://.*?/yazarlar/(.*?)/.*$')
category_regexp = re.compile(r'^http://.*?/(.+?)/.*$')
for section_tuple in self.section_tuples:

View File

@ -43,7 +43,7 @@ class StandardMediaKeRecipe(BasicNewsRecipe):
def print_version(self, url):
import re
p = re.compile('http://www.standardmedia.co.ke/.*InsidePage.php')
p = re.compile(r'http://www.standardmedia.co.ke/.*InsidePage.php')
return p.sub('http://www.standardmedia.co.ke/print.php', url)
def preprocess_html(self, soup):

View File

@ -89,7 +89,7 @@ class TheAge(BasicNewsRecipe):
for i in soup.findAll('a'):
href = i['href']
if href and re.match('http://www.theage.com.au/frontpage/[0-9]+/[0-9]+/[0-9]+/frontpage.pdf', href):
if href and re.match(r'http://www.theage.com.au/frontpage/[0-9]+/[0-9]+/[0-9]+/frontpage.pdf', href):
return href
return None

View File

@ -92,7 +92,7 @@ class PrivateEyeRecipe(BasicNewsRecipe):
# 1. Title. By author
#.2. Title by author: subtitle
# 3. Title: author: subtitle
title_author_re = re.compile('^(.*?)(?:(?: by )|(?:: ))(.*?): (.*?)$')
title_author_re = re.compile(r'^(.*?)(?:(?: by )|(?:: ))(.*?): (.*?)$')
# Separate author from title (where it is specified)
def title_author(self, head):

View File

@ -38,7 +38,7 @@ class Tweakers(BasicNewsRecipe):
'class': ['sidebar', 'advertorial']
},
{
'class': re.compile('nextPrevious')
'class': re.compile(r'nextPrevious')
},
]
no_stylesheets = True

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}),
dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self):

View File

@ -127,7 +127,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})]
keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -141,7 +141,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}),
dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self):

View File

@ -82,28 +82,28 @@ class TimesColonist(BasicNewsRecipe):
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
'''
keep_only_tags = [
dict(name='div', attrs={'class': re.compile('main.content')})]
dict(name='div', attrs={'class': re.compile(r'main.content')})]
def __init__(self, options, log, progress_reporter):
self.remove_tags = [{'class': 'comments'},
{'id': 'photocredit'},
dict(name='div', attrs={
'class': re.compile('top.controls')}),
'class': re.compile(r'top.controls')}),
dict(name='div', attrs={
'class': re.compile('^comments')}),
'class': re.compile(r'^comments')}),
dict(name='div', attrs={
'class': re.compile('social')}),
'class': re.compile(r'social')}),
dict(name='div', attrs={
'class': re.compile('tools')}),
'class': re.compile(r'tools')}),
dict(name='div', attrs={
'class': re.compile('bottom.tools')}),
'class': re.compile(r'bottom.tools')}),
dict(name='div', attrs={
'class': re.compile('window')}),
dict(name='div', attrs={'class': re.compile('related.news.element')})]
'class': re.compile(r'window')}),
dict(name='div', attrs={'class': re.compile(r'related.news.element')})]
print('PROFILE NAME = ' + options.output_profile.short_name)
if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']:
self.remove_tags.append(
dict(name='div', attrs={'class': re.compile('image-container')}))
dict(name='div', attrs={'class': re.compile(r'image-container')}))
BasicNewsRecipe.__init__(self, options, log, progress_reporter)
def get_cover_url(self):
@ -173,19 +173,19 @@ class TimesColonist(BasicNewsRecipe):
return soup
def preprocess_html(self, soup):
byline = soup.find('p', attrs={'class': re.compile('ancillary')})
byline = soup.find('p', attrs={'class': re.compile(r'ancillary')})
if byline is not None:
authstr = self.tag_to_string(byline, False)
authstr = re.sub('/ *Times Colonist', '/',
authstr = re.sub(r'/ *Times Colonist', '/',
authstr, flags=re.IGNORECASE)
authstr = re.sub('BY */', '', authstr, flags=re.IGNORECASE)
authstr = re.sub(r'BY */', '', authstr, flags=re.IGNORECASE)
newdiv = new_tag(soup, 'div')
newdiv.insert(0, authstr)
newdiv['class'] = 'byline'
byline.replaceWith(newdiv)
for caption in soup.findAll('p', attrs={'class': re.compile('caption')}):
for caption in soup.findAll('p', attrs={'class': re.compile(r'caption')}):
capstr = self.tag_to_string(caption, False)
capstr = re.sub('Photograph by.*$', '',
capstr = re.sub(r'Photograph by.*$', '',
capstr, flags=re.IGNORECASE)
newdiv = new_tag(soup, 'div')
newdiv.insert(0, capstr)
@ -239,13 +239,13 @@ class TimesColonist(BasicNewsRecipe):
except:
return ans
mainsoup = soup.find(
'div', attrs={'class': re.compile('main.content')})
'div', attrs={'class': re.compile(r'main.content')})
article_list = []
for wdiv in mainsoup.findAll('div', attrs={'id': re.compile('featured.story')}):
for wdiv in mainsoup.findAll('div', attrs={'id': re.compile(r'featured.story')}):
for htag in wdiv.findAll('h3'):
self.handle_articles(htag, article_list, sectitle)
for ladiv in mainsoup.findAll(attrs={'class': re.compile('leading.articles')}):
for wdiv in mainsoup.findAll('div', attrs={'class': re.compile('article.row')}):
for ladiv in mainsoup.findAll(attrs={'class': re.compile(r'leading.articles')}):
for wdiv in mainsoup.findAll('div', attrs={'class': re.compile(r'article.row')}):
for htag in wdiv.findAll('h2'):
self.handle_articles(htag, article_list, sectitle)
ans.append((sectitle, article_list))

View File

@ -139,7 +139,7 @@ class ZeitDe(BasicNewsRecipe):
body.insert(0, header)
# Add real img tags for images
for container in soup.findAll(class_=re.compile('__media-container$')):
for container in soup.findAll(class_=re.compile(r'__media-container$')):
img = container.find('noscript')
if img is not None:
img.name = 'div'

View File

@ -200,11 +200,11 @@ class ZeitEPUBAbo(BasicNewsRecipe):
# browser.follow_link(abolink)
# find page for latest issue
latestlink = browser.find_link(text_regex=re.compile(
'.*ZUR AKTUELLEN AUSGABE.*'))
r'.*ZUR AKTUELLEN AUSGABE.*'))
browser.follow_link(latestlink)
# now find the correct file, we will still use the ePub file
epublink = browser.find_link(text_regex=re.compile(
'.*EPUB F.*R E-READER LADEN.*')) # change from '.*EPUB FÜR E-READER LADEN.*' in May 2017
r'.*EPUB F.*R E-READER LADEN.*')) # change from '.*EPUB FÜR E-READER LADEN.*' in May 2017
response = browser.follow_link(epublink)
self.report_progress(1, _('next step'))
@ -266,11 +266,11 @@ class ZeitEPUBAbo(BasicNewsRecipe):
# browser.follow_link(abolink)
# find page for latest issue
latestlink = browser.find_link(text_regex=re.compile(
'.*ZUR AKTUELLEN AUSGABE.*'))
r'.*ZUR AKTUELLEN AUSGABE.*'))
browser.follow_link(latestlink)
# actual cover search
pdflink = browser.find_link(text_regex=re.compile(
'.*GESAMT-PDF LADEN.*'))
r'.*GESAMT-PDF LADEN.*'))
cover_url = urlparse(pdflink.base_url)[0] + '://' + urlparse(pdflink.base_url)[1] + '' + (
urlparse(pdflink.url)[2]).replace('ePaper_', '').replace('.pdf', '_001.pdf')
self.log.warning('PDF link found:')

View File

@ -34,6 +34,7 @@ select = [
# preview rules
'RUF051', 'RUF056', # useless dict operation
'RUF055', # unnecessary regex
'RUF039', # always use raw-string for regex
]
[lint.per-file-ignores]
@ -46,7 +47,7 @@ select = [
"src/calibre/gui2/store/stores/*" = ['UP']
"src/calibre/gui2/tts/manager.py" = ['UP037']
"src/calibre/utils/copy_files.py" = ['UP037']
"src/calibre/utils/smartypants.py" = ['RUF055']
"src/calibre/utils/smartypants.py" = ['RUF039', 'RUF055']
"src/qt/*.py" = ['I']
"src/qt/*.pyi" = ['I']

View File

@ -17,7 +17,7 @@ import time
from contextlib import contextmanager
from functools import lru_cache
iswindows = re.search('win(32|64)', sys.platform)
iswindows = re.search(r'win(32|64)', sys.platform)
ismacos = 'darwin' in sys.platform
isfreebsd = 'freebsd' in sys.platform
isnetbsd = 'netbsd' in sys.platform

View File

@ -657,7 +657,7 @@ class Parser(SearchQueryParser): # {{{
if location == 'template':
try:
template, sep, query = regex.split('#@#:([tdnb]):', query, flags=regex.IGNORECASE)
template, sep, query = regex.split(r'#@#:([tdnb]):', query, flags=regex.IGNORECASE)
if sep:
sep = sep.lower()
else:

View File

@ -34,7 +34,7 @@ class CYBOOK(USBMS):
VENDOR_NAME = 'BOOKEEN'
WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_(OPUS|GEN3)__-FD')
WINDOWS_CARD_A_MEM = re.compile('CYBOOK_(OPUS|GEN3)__-SD')
WINDOWS_CARD_A_MEM = re.compile(r'CYBOOK_(OPUS|GEN3)__-SD')
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/Cybook')
EBOOK_DIR_MAIN = 'eBooks'
@ -72,7 +72,7 @@ class ORIZON(CYBOOK):
VENDOR_NAME = ['BOOKEEN', 'LINUX']
WINDOWS_MAIN_MEM = re.compile(r'(CYBOOK_ORIZON__-FD)|(FILE-STOR_GADGET)')
WINDOWS_CARD_A_MEM = re.compile('(CYBOOK_ORIZON__-SD)|(FILE-STOR_GADGET)')
WINDOWS_CARD_A_MEM = re.compile(r'(CYBOOK_ORIZON__-SD)|(FILE-STOR_GADGET)')
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions'

View File

@ -58,11 +58,11 @@ def build_template_regexp(template):
try:
template = template.rpartition('/')[2]
return re.compile(re.sub('{([^}]*)}', f, template) + r'([_\d]*$)')
return re.compile(re.sub(r'{([^}]*)}', f, template) + r'([_\d]*$)')
except:
prints('Failed to parse template: %r'%template)
template = '{title} - {authors}'
return re.compile(re.sub('{([^}]*)}', f, template) + r'([_\d]*$)')
return re.compile(re.sub(r'{([^}]*)}', f, template) + r'([_\d]*$)')
def create_upload_path(mdata, fname, template, sanitize,

View File

@ -239,7 +239,7 @@ def generate_masthead(title, output_path=None, width=600, height=60):
def escape_xpath_attr(value):
if '"' in value:
if "'" in value:
parts = re.split('("+)', value)
parts = re.split(r'("+)', value)
ans = []
for x in parts:
if x:

View File

@ -42,7 +42,7 @@ def _metadata_from_table(soup, searchfor):
# on the home page. cue some nasty special-case hacks...
if re.match(r'^\s*'+searchfor+r'\s*$', td.decode_contents(), flags=re.I):
meta = _detag(td.findNextSibling('td'))
return re.sub('^:', '', meta).strip()
return re.sub(r'^:', '', meta).strip()
else:
meta = _detag(td)
return re.sub(r'^[^:]+:', '', meta).strip()
@ -89,7 +89,7 @@ def _get_comments(soup):
def _get_cover(soup, rdr):
ans = None
try:
ans = soup.find('img', alt=re.compile('cover', flags=re.I))['src']
ans = soup.find('img', alt=re.compile(r'cover', flags=re.I))['src']
except TypeError:
# meeehh, no handy alt-tag goodness, try some hackery
# the basic idea behind this is that in general, the cover image

View File

@ -16,7 +16,7 @@ XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
SVG_NS = 'http://www.w3.org/2000/svg'
XLINK_NS = 'http://www.w3.org/1999/xlink'
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
_span_pat = re.compile(r'<span.*?</span>', re.DOTALL|re.IGNORECASE)
LIGATURES = {
# 'Æ': 'AE',
@ -92,7 +92,7 @@ class DocAnalysis:
elif format == 'pdf':
linere = re.compile(r'(?<=<br>)(?!\s*<br>).*?(?=<br>)', re.DOTALL)
elif format == 'spanned_html':
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
linere = re.compile(r'(?<=<span).*?(?=</span>)', re.DOTALL)
elif format == 'txt':
linere = re.compile('.*?\n')
self.lines = linere.findall(raw)
@ -430,16 +430,16 @@ def book_designer_rules():
if ans is None:
ans = book_designer_rules.ans = [
# HR
(re.compile('<hr>', re.IGNORECASE),
(re.compile(r'<hr>', re.IGNORECASE),
lambda match : '<span style="page-break-after:always"> </span>'),
# Create header tags
(re.compile(r'<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile(r'<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
(re.compile(r'<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
(re.compile(r'<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
]
return ans
@ -458,7 +458,7 @@ class HTMLPreProcessor:
re.IGNORECASE).search(src) is not None
def is_book_designer(self, raw):
return re.search('<H2[^><]*id=BookTitle', raw) is not None
return re.search(r'<H2[^><]*id=BookTitle', raw) is not None
def is_pdftohtml(self, src):
return "<!-- created by calibre's pdftohtml -->" in src[:1000]

View File

@ -27,7 +27,7 @@ class HeuristicProcessor:
self.chapters_with_title = 0
self.blanks_deleted = False
self.blanks_between_paragraphs = False
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
self.linereg = re.compile(r'(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|whitespace)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}(?!\s*<h\d)', re.IGNORECASE)
@ -108,7 +108,7 @@ class HeuristicProcessor:
inspect. Percent is the minimum percent of line endings which should
be marked up to return true.
'''
htm_end_ere = re.compile('</(p|div)>', re.DOTALL)
htm_end_ere = re.compile(r'</(p|div)>', re.DOTALL)
line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL)
htm_end = htm_end_ere.findall(raw)
line_end = line_end_ere.findall(raw)
@ -209,7 +209,7 @@ class HeuristicProcessor:
typical_chapters = 15000.
self.min_chapters = int(ceil(wordcount / typical_chapters))
self.log.debug('minimum chapters required are: '+str(self.min_chapters))
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
heading = re.compile(r'<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
self.log.debug('found ' + str(self.html_preprocess_sections) + ' pre-existing headings')
@ -299,7 +299,7 @@ class HeuristicProcessor:
break
full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close
if n_lookahead_req:
n_lookahead = re.sub('(ou|in|cha)', 'lookahead_', full_chapter_line)
n_lookahead = re.sub(r'(ou|in|cha)', 'lookahead_', full_chapter_line)
if not analyze:
self.log.debug('Marked ' + str(self.html_preprocess_sections) + ' headings, ' + log_message)
@ -442,7 +442,7 @@ class HeuristicProcessor:
# Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\\w+>', '', html)
# Re-open self closing paragraph tags
html = re.sub('<p[^>/]*/>', '<p> </p>', html)
html = re.sub(r'<p[^>/]*/>', '<p> </p>', html)
# Get rid of empty span, bold, font, em, & italics tags
fmt_tags = 'font|[ibu]|em|strong'
open_fmt_pat, close_fmt_pat = fr'<(?:{fmt_tags})(?:\s[^>]*)?>', f'</(?:{fmt_tags})>'
@ -462,8 +462,8 @@ class HeuristicProcessor:
determines the type of html line ending used most commonly in a document
use before calling docanalysis functions
'''
paras_reg = re.compile('<p[^>]*>', re.IGNORECASE)
spans_reg = re.compile('<span[^>]*>', re.IGNORECASE)
paras_reg = re.compile(r'<p[^>]*>', re.IGNORECASE)
spans_reg = re.compile(r'<span[^>]*>', re.IGNORECASE)
paras = len(paras_reg.findall(html))
spans = len(spans_reg.findall(html))
if spans > 1:
@ -557,8 +557,8 @@ class HeuristicProcessor:
def detect_soft_breaks(self, html):
line = '(?P<initline>'+self.line_open+'\\s*(?P<init_content>.*?)'+self.line_close+')'
line_two = '(?P<line_two>'+re.sub('(ou|in|cha)', 'linetwo_', self.line_open)+ \
'\\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')'
line_two = '(?P<line_two>'+re.sub(r'(ou|in|cha)', 'linetwo_', self.line_open)+ \
'\\s*(?P<line_two_content>.*?)'+re.sub(r'(ou|in|cha)', 'linetwo_', self.line_close)+')'
div_break_candidate_pattern = line+'\\s*<div[^>]*>\\s*</div>\\s*'+line_two
div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE)
@ -596,8 +596,8 @@ class HeuristicProcessor:
All other html is converted to text.
'''
hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em; page-break-before:avoid">'
if re.findall('(<|>)', replacement_break):
if re.match('^<hr', replacement_break):
if re.findall(r'(<|>)', replacement_break):
if re.match(r'^<hr', replacement_break):
if replacement_break.find('width') != -1:
try:
width = int(re.sub('.*?width(:|=)(?P<wnum>\\d+).*', '\\g<wnum>', replacement_break))
@ -608,11 +608,11 @@ class HeuristicProcessor:
else:
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
divpercent = (100 - width) // 2
hr_open = re.sub('45', str(divpercent), hr_open)
hr_open = re.sub(r'45', str(divpercent), hr_open)
scene_break = hr_open+replacement_break+'</div>'
else:
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
elif re.match('^<img', replacement_break):
elif re.match(r'^<img', replacement_break):
scene_break = self.scene_break_open+replacement_break+'</p>'
else:
from calibre.utils.html2text import html2text
@ -638,7 +638,7 @@ class HeuristicProcessor:
empty_paragraph = '\n<p> </p>\n'
self.in_blockquote = False
self.previous_was_paragraph = False
html = re.sub('</?a[^>]*>', '', html)
html = re.sub(r'</?a[^>]*>', '', html)
def convert_styles(match):
# print('raw styles are: '+match.group('styles'))

View File

@ -91,7 +91,7 @@ class HTMLFile:
HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
HTML_PAT_BIN = re.compile(br'<\s*html', re.IGNORECASE)
TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE)
TITLE_PAT = re.compile(r'<title>([^<>]+)</title>', re.IGNORECASE)
LINK_PAT = re.compile(
r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))',
re.DOTALL|re.IGNORECASE)

View File

@ -269,7 +269,7 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
tag = 'div'
# Add page-break-brefore: always because renders typically treat a new file (we're merging files)
# as a page break and remove all other page break types that might be set.
style_a = 'page-break-before: always; %s' % re.sub('page-break-[^:]+:[^;]+;?', '', style_a)
style_a = 'page-break-before: always; %s' % re.sub(r'page-break-[^:]+:[^;]+;?', '', style_a)
# Remove unnecessary spaces.
style_a = re.sub(r'\s{2,}', ' ', style_a).strip()
tags.append(tag)

View File

@ -34,8 +34,8 @@ class Hyphenator:
def _insert_pattern(self, pattern):
# Convert a pattern like 'a1bc3d4' into a string of chars 'abcd'
# and a list of points [ 1, 0, 3, 4 ].
chars = re.sub('[0-9]', '', pattern)
points = [int(d or 0) for d in re.split('[.a-z]', pattern)]
chars = re.sub(r'[0-9]', '', pattern)
points = [int(d or 0) for d in re.split(r'[.a-z]', pattern)]
# Insert the pattern into the tree. Each character finds a dict
# another level down in the tree, and leaf nodes have the list of

View File

@ -163,7 +163,7 @@ class HTMLConverter:
# Fix Book Designer markup
BOOK_DESIGNER = [
# HR
(re.compile('<hr>', re.IGNORECASE),
(re.compile(r'<hr>', re.IGNORECASE),
lambda match : '<span style="page-break-after:always"> </span>'),
# Create header tags
(re.compile(r'<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
@ -279,7 +279,7 @@ class HTMLConverter:
if isinstance(src, bytes):
src = src.decode('utf-8', 'replace')
match = self.PAGE_BREAK_PAT.search(src)
if match and not re.match('avoid', match.group(1), re.IGNORECASE):
if match and not re.match(r'avoid', match.group(1), re.IGNORECASE):
self.page_break_found = True
ncss, npcss = self.parse_css(src)
if ncss:
@ -324,10 +324,10 @@ class HTMLConverter:
def is_baen(self, soup):
return bool(soup.find('meta', attrs={'name':'Publisher',
'content':re.compile('Baen', re.IGNORECASE)}))
'content':re.compile(r'Baen', re.IGNORECASE)}))
def is_book_designer(self, raw):
return bool(re.search('<H2[^><]*id=BookTitle', raw))
return bool(re.search(r'<H2[^><]*id=BookTitle', raw))
def preprocess(self, raw):
nmassage = []
@ -1152,7 +1152,7 @@ class HTMLConverter:
def font_weight(val):
ans = 0
m = re.search('([0-9]+)', val)
m = re.search(r'([0-9]+)', val)
if m:
ans = int(m.group(1))
elif val.find('bold') >= 0 or val.find('strong') >= 0:
@ -1544,7 +1544,7 @@ class HTMLConverter:
with open(path, 'rb') as f:
src = f.read().decode('utf-8', 'replace')
match = self.PAGE_BREAK_PAT.search(src)
if match and not re.match('avoid', match.group(1), re.IGNORECASE):
if match and not re.match(r'avoid', match.group(1), re.IGNORECASE):
self.page_break_found = True
ncss, npcss = self.parse_css(src)
except OSError:
@ -1869,11 +1869,11 @@ def process_file(path, options, logger):
header.append(fheader + ' ')
book, fonts = Book(options, logger, header=header, **args)
le = re.compile(options.link_exclude) if options.link_exclude else \
re.compile('$')
re.compile(r'$')
pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \
re.compile('$')
re.compile(r'$')
fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
re.compile('$')
re.compile(r'$')
cq = options.chapter_attr.split(',')
if len(cq) < 3:
raise ValueError('The --chapter-attr setting must have 2 commas.')

View File

@ -213,7 +213,7 @@ class Row:
def __init__(self, conv, row, css, colpad):
self.cells = []
self.colpad = colpad
cells = row.findAll(re.compile('td|th', re.IGNORECASE))
cells = row.findAll(re.compile(r'td|th', re.IGNORECASE))
self.targets = []
for cell in cells:
ccss = conv.tag_css(cell, css)[0]

View File

@ -172,7 +172,7 @@ def get_title_sort_pat(lang=None):
except:
ans = re.compile(r'^(A|The|An)\s+', re.IGNORECASE)
else:
ans = re.compile('^$') # matches only the empty string
ans = re.compile(r'^$') # matches only the empty string
_title_pats[lang] = ans
return ans

View File

@ -139,7 +139,7 @@ def metadata_from_filename(name, pat=None, fallback_pat=None):
try:
pat = regex.compile(prefs.get('filename_pattern'), flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE)
except Exception:
pat = regex.compile('(?P<title>.+) - (?P<author>[^_]+)', flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE)
pat = regex.compile(r'(?P<title>.+) - (?P<author>[^_]+)', flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE)
name = name.replace('_', ' ')
match = pat.search(name)

View File

@ -59,4 +59,4 @@ def set_metadata(stream, mi):
MetadataWriter(stream, mi)
stream.seek(0)
stream.write(re.sub('[^-A-Za-z0-9 ]+', '_', mi.title).ljust(31, '\x00')[:31].encode('ascii', 'replace') + b'\x00')
stream.write(re.sub(r'[^-A-Za-z0-9 ]+', '_', mi.title).ljust(31, '\x00')[:31].encode('ascii', 'replace') + b'\x00')

View File

@ -365,7 +365,7 @@ class Worker(Thread): # Get details {{{
r'([0-9.,]+) ?(out of|von|van|su|étoiles sur|つ星のうち|de un máximo de|de|av) '
r'([\d\.]+)( (stars|Sternen|stelle|estrellas|estrelas|sterren|stjärnor)){0,1}'
)
self.ratings_pat_cn = re.compile('([0-9.]+) 颗星,最多 5 颗星')
self.ratings_pat_cn = re.compile(r'([0-9.]+) 颗星,最多 5 颗星')
self.ratings_pat_jp = re.compile(r'\d+つ星のうち([\d\.]+)')
lm = {

View File

@ -165,7 +165,7 @@ def wayback_url_processor(url):
if url.startswith('/'):
# Use original URL instead of absolutizing to wayback URL as wayback is
# slow
m = re.search('https?:', url)
m = re.search(r'https?:', url)
if m is None:
url = 'https://web.archive.org' + url
else:

View File

@ -380,7 +380,7 @@ class MobiReader:
self.processed_html = re.sub(
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', r'\g<blockquote>'+r'\g<para>', self.processed_html)
bods = htmls = 0
for x in re.finditer('</body>|</html>', self.processed_html):
for x in re.finditer(r'</body>|</html>', self.processed_html):
if x == '</body>':
bods +=1
else:

View File

@ -155,7 +155,7 @@ def hfix(name, raw):
return raw
CLI_HELP = {x:hfix(x, re.sub('<.*?>', '', y)) for x, y in iteritems(HELP)}
CLI_HELP = {x:hfix(x, re.sub(r'<.*?>', '', y)) for x, y in iteritems(HELP)}
# }}}

View File

@ -36,7 +36,7 @@ class Patterns:
# French words with prefixes are reduced to the stem word, so that the
# words appear only once in the word list
self.fr_elision_pat = regex.compile(
"^(?:l|d|m|t|s|j|c|ç|lorsqu|puisqu|quoiqu|qu)[']", flags=regex.UNICODE | regex.VERSION1 | regex.IGNORECASE)
r"^(?:l|d|m|t|s|j|c|ç|lorsqu|puisqu|quoiqu|qu)[']", flags=regex.UNICODE | regex.VERSION1 | regex.IGNORECASE)
def patterns():

View File

@ -102,7 +102,7 @@ class SVGRasterizer:
if view_box is not None:
try:
box = [float(x) for x in filter(None, re.split('[, ]', view_box))]
box = [float(x) for x in filter(None, re.split(r'[, ]', view_box))]
sizes = [box[2]-box[0], box[3] - box[1]]
except (TypeError, ValueError, IndexError):
logger.warn('SVG image has invalid viewBox="%s", ignoring the viewBox' % view_box)

View File

@ -152,7 +152,7 @@ def flip_image(img, flip):
def flip_images(raw):
for match in re.finditer('<IMG[^>]+/?>', raw, flags=re.I):
for match in re.finditer(r'<IMG[^>]+/?>', raw, flags=re.I):
img = match.group()
m = re.search(r'class="(x|y|xy)flip"', img)
if m is None:
@ -174,5 +174,5 @@ def flip_images(raw):
counter += 1
return m.group(1).rstrip('/') + f' alt="Image {counter}"/>'
raw = re.sub('(<IMG[^>]+)/?>', add_alt, raw, flags=re.I)
raw = re.sub(r'(<IMG[^>]+)/?>', add_alt, raw, flags=re.I)
return raw

View File

@ -121,7 +121,7 @@ class Font:
self.metrics, self.compress = metrics, compress
self.is_otf = self.metrics.is_otf
self.subset_tag = str(
re.sub('.', lambda m: codepoint_to_chr(int(m.group())+ord('A')), oct(num).replace('o', '')
re.sub(r'.', lambda m: codepoint_to_chr(int(m.group())+ord('A')), oct(num).replace('o', '')
)).rjust(6, 'A')
self.font_stream = FontStream(metrics.is_otf, compress=compress)
try:

View File

@ -199,11 +199,11 @@ class PMLMLizer:
text = re.sub('[^\x00-\x7f]', lambda x: unipmlcode(x.group()), text)
# Remove excess spaces at beginning and end of lines
text = re.sub('(?m)^[ ]+', '', text)
text = re.sub('(?m)[ ]+$', '', text)
text = re.sub(r'(?m)^[ ]+', '', text)
text = re.sub(r'(?m)[ ]+$', '', text)
# Remove excessive spaces
text = re.sub('[ ]{2,}', ' ', text)
text = re.sub(r'[ ]{2,}', ' ', text)
# Condense excessive \c empty line sequences.
text = re.sub(r'(\\c\s*\\c\s*){2,}', r'\\c \n\\c\n', text)
@ -213,7 +213,7 @@ class PMLMLizer:
if self.opts.remove_paragraph_spacing:
text = re.sub('\n{2,}', '\n', text)
# Only indent lines that don't have special formatting
text = re.sub('(?imu)^(?P<text>.+)$', lambda mo: mo.group('text')
text = re.sub(r'(?imu)^(?P<text>.+)$', lambda mo: mo.group('text')
if re.search(r'\\[XxCmrctTp]', mo.group('text')) else ' %s' % mo.group('text'), text)
else:
text = re.sub('\n{3,}', '\n\n', text)

View File

@ -19,11 +19,11 @@ def tounicode(tree_or_node, **kwargs):
REGEXES = {
'unlikelyCandidatesRe': re.compile('combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter',re.I), # noqa: E501
'okMaybeItsACandidateRe': re.compile('and|article|body|column|main|shadow',re.I),
'positiveRe': re.compile('article|body|content|entry|hentry|main|page|pagination|post|text|blog|story',re.I),
'negativeRe': re.compile('combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget',re.I), # noqa: E501
'divToPElementsRe': re.compile('<(a|blockquote|dl|div|img|ol|p|pre|table|ul)',re.I),
'unlikelyCandidatesRe': re.compile(r'combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter',re.I), # noqa: E501
'okMaybeItsACandidateRe': re.compile(r'and|article|body|column|main|shadow',re.I),
'positiveRe': re.compile(r'article|body|content|entry|hentry|main|page|pagination|post|text|blog|story',re.I),
'negativeRe': re.compile(r'combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget',re.I), # noqa: E501
'divToPElementsRe': re.compile(r'<(a|blockquote|dl|div|img|ol|p|pre|table|ul)',re.I),
# 'replaceBrsRe': re.compile('(<br[^>]*>[ \n\r\t]*){2,}',re.I),
# 'replaceFontsRe': re.compile('<(\/?)font[^>]*>',re.I),
# 'trimRe': re.compile('^\s+|\s+$/'),

View File

@ -121,7 +121,7 @@ class RTFMLizer:
self.log.debug('Converting %s to RTF markup...' % item.href)
# Removing comments is needed as comments with -- inside them can
# cause fromstring() to fail
content = re.sub('<!--.*?-->', '', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL)
content = re.sub(r'<!--.*?-->', '', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL)
content = self.remove_newlines(content)
content = self.remove_tabs(content)
content = safe_xml_fromstring(content)
@ -198,7 +198,7 @@ class RTFMLizer:
text = re.sub('%s{3,}' % os.linesep, f'{os.linesep}{os.linesep}', text)
# Remove excessive spaces
text = re.sub('[ ]{2,}', ' ', text)
text = re.sub(r'[ ]{2,}', ' ', text)
text = re.sub('\t{2,}', '\t', text)
text = text.replace('\t ', '\t')

View File

@ -652,7 +652,7 @@ class ProcessTokens:
return f'cw<{pre}<{token}<nu<{type}\n'
def __language_func(self, pre, token, num):
lang_name = self.__language_dict.get(int(re.search('[0-9]+', num).group()))
lang_name = self.__language_dict.get(int(re.search(r'[0-9]+', num).group()))
if not lang_name:
lang_name = 'not defined'
if self.__run_level > 3:

View File

@ -165,13 +165,13 @@ class SNBMLizer:
text = re.sub('\n[ ]+\n', '\n\n', text)
if self.opts.remove_paragraph_spacing:
text = re.sub('\n{2,}', '\n', text)
text = re.sub('(?imu)^(?=.)', '\t', text)
text = re.sub(r'(?imu)^(?=.)', '\t', text)
else:
text = re.sub('\n{3,}', '\n\n', text)
# Replace spaces at the beginning and end of lines
text = re.sub('(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text)
text = re.sub(r'(?imu)^[ ]+', '', text)
text = re.sub(r'(?imu)[ ]+$', '', text)
if self.opts.snb_max_line_length:
max_length = self.opts.snb_max_line_length

View File

@ -6,117 +6,117 @@ import re
def unsmarten(txt):
txt = re.sub('&#162;|&cent;|¢', r'{c\}', txt) # cent
txt = re.sub('&#163;|&pound;|£', r'{L-}', txt) # pound
txt = re.sub('&#165;|&yen;|¥', r'{Y=}', txt) # yen
txt = re.sub('&#169;|&copy;|©', r'{(c)}', txt) # copyright
txt = re.sub('&#174;|&reg;|®', r'{(r)}', txt) # registered
txt = re.sub('&#188;|&frac14;|¼', r'{1/4}', txt) # quarter
txt = re.sub('&#189;|&frac12;|½', r'{1/2}', txt) # half
txt = re.sub('&#190;|&frac34;|¾', r'{3/4}', txt) # three-quarter
txt = re.sub('&#192;|&Agrave;|À', r'{A`)}', txt) # A-grave
txt = re.sub('&#193;|&Aacute;|Á', r"{A'}", txt) # A-acute
txt = re.sub('&#194;|&Acirc;|Â', r'{A^}', txt) # A-circumflex
txt = re.sub('&#195;|&Atilde;|Ã', r'{A~}', txt) # A-tilde
txt = re.sub('&#196;|&Auml;|Ä', r'{A"}', txt) # A-umlaut
txt = re.sub('&#197;|&Aring;|Å', r'{Ao}', txt) # A-ring
txt = re.sub('&#198;|&AElig;|Æ', r'{AE}', txt) # AE
txt = re.sub('&#199;|&Ccedil;|Ç', r'{C,}', txt) # C-cedilla
txt = re.sub('&#200;|&Egrave;|È', r'{E`}', txt) # E-grave
txt = re.sub('&#201;|&Eacute;|É', r"{E'}", txt) # E-acute
txt = re.sub('&#202;|&Ecirc;|Ê', r'{E^}', txt) # E-circumflex
txt = re.sub('&#203;|&Euml;|Ë', r'{E"}', txt) # E-umlaut
txt = re.sub('&#204;|&Igrave;|Ì', r'{I`}', txt) # I-grave
txt = re.sub('&#205;|&Iacute;|Í', r"{I'}", txt) # I-acute
txt = re.sub('&#206;|&Icirc;|Î', r'{I^}', txt) # I-circumflex
txt = re.sub('&#207;|&Iuml;|Ï', r'{I"}', txt) # I-umlaut
txt = re.sub('&#208;|&ETH;|Ð', r'{D-}', txt) # ETH
txt = re.sub('&#209;|&Ntilde;|Ñ', r'{N~}', txt) # N-tilde
txt = re.sub('&#210;|&Ograve;|Ò', r'{O`}', txt) # O-grave
txt = re.sub('&#211;|&Oacute;|Ó', r"{O'}", txt) # O-acute
txt = re.sub('&#212;|&Ocirc;|Ô', r'{O^}', txt) # O-circumflex
txt = re.sub('&#213;|&Otilde;|Õ', r'{O~}', txt) # O-tilde
txt = re.sub('&#214;|&Ouml;|Ö', r'{O"}', txt) # O-umlaut
txt = re.sub('&#215;|&times;|×', r'{x}', txt) # dimension
txt = re.sub('&#216;|&Oslash;|Ø', r'{O/}', txt) # O-slash
txt = re.sub('&#217;|&Ugrave;|Ù', r'{U`}', txt) # U-grave
txt = re.sub('&#218;|&Uacute;|Ú', r"{U'}", txt) # U-acute
txt = re.sub('&#219;|&Ucirc;|Û', r'{U^}', txt) # U-circumflex
txt = re.sub('&#220;|&Uuml;|Ü', r'{U"}', txt) # U-umlaut
txt = re.sub('&#221;|&Yacute;|Ý', r"{Y'}", txt) # Y-grave
txt = re.sub('&#223;|&szlig;|ß', r'{sz}', txt) # sharp-s
txt = re.sub('&#224;|&agrave;|à', r'{a`}', txt) # a-grave
txt = re.sub('&#225;|&aacute;|á', r"{a'}", txt) # a-acute
txt = re.sub('&#226;|&acirc;|â', r'{a^}', txt) # a-circumflex
txt = re.sub('&#227;|&atilde;|ã', r'{a~}', txt) # a-tilde
txt = re.sub('&#228;|&auml;|ä', r'{a"}', txt) # a-umlaut
txt = re.sub('&#229;|&aring;|å', r'{ao}', txt) # a-ring
txt = re.sub('&#230;|&aelig;|æ', r'{ae}', txt) # ae
txt = re.sub('&#231;|&ccedil;|ç', r'{c,}', txt) # c-cedilla
txt = re.sub('&#232;|&egrave;|è', r'{e`}', txt) # e-grave
txt = re.sub('&#233;|&eacute;|é', r"{e'}", txt) # e-acute
txt = re.sub('&#234;|&ecirc;|ê', r'{e^}', txt) # e-circumflex
txt = re.sub('&#235;|&euml;|ë', r'{e"}', txt) # e-umlaut
txt = re.sub('&#236;|&igrave;|ì', r'{i`}', txt) # i-grave
txt = re.sub('&#237;|&iacute;|í', r"{i'}", txt) # i-acute
txt = re.sub('&#238;|&icirc;|î', r'{i^}', txt) # i-circumflex
txt = re.sub('&#239;|&iuml;|ï', r'{i"}', txt) # i-umlaut
txt = re.sub('&#240;|&eth;|ð', r'{d-}', txt) # eth
txt = re.sub('&#241;|&ntilde;|ñ', r'{n~}', txt) # n-tilde
txt = re.sub('&#242;|&ograve;|ò', r'{o`}', txt) # o-grave
txt = re.sub('&#243;|&oacute;|ó', r"{o'}", txt) # o-acute
txt = re.sub('&#244;|&ocirc;|ô', r'{o^}', txt) # o-circumflex
txt = re.sub('&#245;|&otilde;|õ', r'{o~}', txt) # o-tilde
txt = re.sub('&#246;|&ouml;|ö', r'{o"}', txt) # o-umlaut
txt = re.sub('&#248;|&oslash;|ø', r'{o/}', txt) # o-stroke
txt = re.sub('&#249;|&ugrave;|ù', r'{u`}', txt) # u-grave
txt = re.sub('&#250;|&uacute;|ú', r"{u'}", txt) # u-acute
txt = re.sub('&#251;|&ucirc;|û', r'{u^}', txt) # u-circumflex
txt = re.sub('&#252;|&uuml;|ü', r'{u"}', txt) # u-umlaut
txt = re.sub('&#253;|&yacute;|ý', r"{y'}", txt) # y-acute
txt = re.sub('&#255;|&yuml;|ÿ', r'{y"}', txt) # y-umlaut
txt = re.sub(r'&#162;|&cent;|¢', r'{c\}', txt) # cent
txt = re.sub(r'&#163;|&pound;|£', r'{L-}', txt) # pound
txt = re.sub(r'&#165;|&yen;|¥', r'{Y=}', txt) # yen
txt = re.sub(r'&#169;|&copy;|©', r'{(c)}', txt) # copyright
txt = re.sub(r'&#174;|&reg;|®', r'{(r)}', txt) # registered
txt = re.sub(r'&#188;|&frac14;|¼', r'{1/4}', txt) # quarter
txt = re.sub(r'&#189;|&frac12;|½', r'{1/2}', txt) # half
txt = re.sub(r'&#190;|&frac34;|¾', r'{3/4}', txt) # three-quarter
txt = re.sub(r'&#192;|&Agrave;|À', r'{A`)}', txt) # A-grave
txt = re.sub(r'&#193;|&Aacute;|Á', r"{A'}", txt) # A-acute
txt = re.sub(r'&#194;|&Acirc;|Â', r'{A^}', txt) # A-circumflex
txt = re.sub(r'&#195;|&Atilde;|Ã', r'{A~}', txt) # A-tilde
txt = re.sub(r'&#196;|&Auml;|Ä', r'{A"}', txt) # A-umlaut
txt = re.sub(r'&#197;|&Aring;|Å', r'{Ao}', txt) # A-ring
txt = re.sub(r'&#198;|&AElig;|Æ', r'{AE}', txt) # AE
txt = re.sub(r'&#199;|&Ccedil;|Ç', r'{C,}', txt) # C-cedilla
txt = re.sub(r'&#200;|&Egrave;|È', r'{E`}', txt) # E-grave
txt = re.sub(r'&#201;|&Eacute;|É', r"{E'}", txt) # E-acute
txt = re.sub(r'&#202;|&Ecirc;|Ê', r'{E^}', txt) # E-circumflex
txt = re.sub(r'&#203;|&Euml;|Ë', r'{E"}', txt) # E-umlaut
txt = re.sub(r'&#204;|&Igrave;|Ì', r'{I`}', txt) # I-grave
txt = re.sub(r'&#205;|&Iacute;|Í', r"{I'}", txt) # I-acute
txt = re.sub(r'&#206;|&Icirc;|Î', r'{I^}', txt) # I-circumflex
txt = re.sub(r'&#207;|&Iuml;|Ï', r'{I"}', txt) # I-umlaut
txt = re.sub(r'&#208;|&ETH;|Ð', r'{D-}', txt) # ETH
txt = re.sub(r'&#209;|&Ntilde;|Ñ', r'{N~}', txt) # N-tilde
txt = re.sub(r'&#210;|&Ograve;|Ò', r'{O`}', txt) # O-grave
txt = re.sub(r'&#211;|&Oacute;|Ó', r"{O'}", txt) # O-acute
txt = re.sub(r'&#212;|&Ocirc;|Ô', r'{O^}', txt) # O-circumflex
txt = re.sub(r'&#213;|&Otilde;|Õ', r'{O~}', txt) # O-tilde
txt = re.sub(r'&#214;|&Ouml;|Ö', r'{O"}', txt) # O-umlaut
txt = re.sub(r'&#215;|&times;|×', r'{x}', txt) # dimension
txt = re.sub(r'&#216;|&Oslash;|Ø', r'{O/}', txt) # O-slash
txt = re.sub(r'&#217;|&Ugrave;|Ù', r'{U`}', txt) # U-grave
txt = re.sub(r'&#218;|&Uacute;|Ú', r"{U'}", txt) # U-acute
txt = re.sub(r'&#219;|&Ucirc;|Û', r'{U^}', txt) # U-circumflex
txt = re.sub(r'&#220;|&Uuml;|Ü', r'{U"}', txt) # U-umlaut
txt = re.sub(r'&#221;|&Yacute;|Ý', r"{Y'}", txt) # Y-grave
txt = re.sub(r'&#223;|&szlig;|ß', r'{sz}', txt) # sharp-s
txt = re.sub(r'&#224;|&agrave;|à', r'{a`}', txt) # a-grave
txt = re.sub(r'&#225;|&aacute;|á', r"{a'}", txt) # a-acute
txt = re.sub(r'&#226;|&acirc;|â', r'{a^}', txt) # a-circumflex
txt = re.sub(r'&#227;|&atilde;|ã', r'{a~}', txt) # a-tilde
txt = re.sub(r'&#228;|&auml;|ä', r'{a"}', txt) # a-umlaut
txt = re.sub(r'&#229;|&aring;|å', r'{ao}', txt) # a-ring
txt = re.sub(r'&#230;|&aelig;|æ', r'{ae}', txt) # ae
txt = re.sub(r'&#231;|&ccedil;|ç', r'{c,}', txt) # c-cedilla
txt = re.sub(r'&#232;|&egrave;|è', r'{e`}', txt) # e-grave
txt = re.sub(r'&#233;|&eacute;|é', r"{e'}", txt) # e-acute
txt = re.sub(r'&#234;|&ecirc;|ê', r'{e^}', txt) # e-circumflex
txt = re.sub(r'&#235;|&euml;|ë', r'{e"}', txt) # e-umlaut
txt = re.sub(r'&#236;|&igrave;|ì', r'{i`}', txt) # i-grave
txt = re.sub(r'&#237;|&iacute;|í', r"{i'}", txt) # i-acute
txt = re.sub(r'&#238;|&icirc;|î', r'{i^}', txt) # i-circumflex
txt = re.sub(r'&#239;|&iuml;|ï', r'{i"}', txt) # i-umlaut
txt = re.sub(r'&#240;|&eth;|ð', r'{d-}', txt) # eth
txt = re.sub(r'&#241;|&ntilde;|ñ', r'{n~}', txt) # n-tilde
txt = re.sub(r'&#242;|&ograve;|ò', r'{o`}', txt) # o-grave
txt = re.sub(r'&#243;|&oacute;|ó', r"{o'}", txt) # o-acute
txt = re.sub(r'&#244;|&ocirc;|ô', r'{o^}', txt) # o-circumflex
txt = re.sub(r'&#245;|&otilde;|õ', r'{o~}', txt) # o-tilde
txt = re.sub(r'&#246;|&ouml;|ö', r'{o"}', txt) # o-umlaut
txt = re.sub(r'&#248;|&oslash;|ø', r'{o/}', txt) # o-stroke
txt = re.sub(r'&#249;|&ugrave;|ù', r'{u`}', txt) # u-grave
txt = re.sub(r'&#250;|&uacute;|ú', r"{u'}", txt) # u-acute
txt = re.sub(r'&#251;|&ucirc;|û', r'{u^}', txt) # u-circumflex
txt = re.sub(r'&#252;|&uuml;|ü', r'{u"}', txt) # u-umlaut
txt = re.sub(r'&#253;|&yacute;|ý', r"{y'}", txt) # y-acute
txt = re.sub(r'&#255;|&yuml;|ÿ', r'{y"}', txt) # y-umlaut
txt = re.sub('&#268;|&Ccaron;|Č', r'{Cˇ}', txt) # C-caron
txt = re.sub('&#269;|&ccaron;|č', r'{cˇ}', txt) # c-caron
txt = re.sub('&#270;|&Dcaron;|Ď', r'{Dˇ}', txt) # D-caron
txt = re.sub('&#271;|&dcaron;|ď', r'{dˇ}', txt) # d-caron
txt = re.sub('&#282;|&Ecaron;|Ě', r'{Eˇ}', txt) # E-caron
txt = re.sub('&#283;|&ecaron;|ě', r'{eˇ}', txt) # e-caron
txt = re.sub('&#313;|&Lacute;|Ĺ', r"{L'}", txt) # L-acute
txt = re.sub('&#314;|&lacute;|ĺ', r"{l'}", txt) # l-acute
txt = re.sub('&#317;|&Lcaron;|Ľ', r'{Lˇ}', txt) # L-caron
txt = re.sub('&#318;|&lcaron;|ľ', r'{lˇ}', txt) # l-caron
txt = re.sub('&#327;|&Ncaron;|Ň', r'{Nˇ}', txt) # N-caron
txt = re.sub('&#328;|&ncaron;|ň', r'{nˇ}', txt) # n-caron
txt = re.sub(r'&#268;|&Ccaron;|Č', r'{Cˇ}', txt) # C-caron
txt = re.sub(r'&#269;|&ccaron;|č', r'{cˇ}', txt) # c-caron
txt = re.sub(r'&#270;|&Dcaron;|Ď', r'{Dˇ}', txt) # D-caron
txt = re.sub(r'&#271;|&dcaron;|ď', r'{dˇ}', txt) # d-caron
txt = re.sub(r'&#282;|&Ecaron;|Ě', r'{Eˇ}', txt) # E-caron
txt = re.sub(r'&#283;|&ecaron;|ě', r'{eˇ}', txt) # e-caron
txt = re.sub(r'&#313;|&Lacute;|Ĺ', r"{L'}", txt) # L-acute
txt = re.sub(r'&#314;|&lacute;|ĺ', r"{l'}", txt) # l-acute
txt = re.sub(r'&#317;|&Lcaron;|Ľ', r'{Lˇ}', txt) # L-caron
txt = re.sub(r'&#318;|&lcaron;|ľ', r'{lˇ}', txt) # l-caron
txt = re.sub(r'&#327;|&Ncaron;|Ň', r'{Nˇ}', txt) # N-caron
txt = re.sub(r'&#328;|&ncaron;|ň', r'{nˇ}', txt) # n-caron
txt = re.sub('&#338;|&OElig;|Œ', r'{OE}', txt) # OE
txt = re.sub('&#339;|&oelig;|œ', r'{oe}', txt) # oe
txt = re.sub(r'&#338;|&OElig;|Œ', r'{OE}', txt) # OE
txt = re.sub(r'&#339;|&oelig;|œ', r'{oe}', txt) # oe
txt = re.sub('&#340;|&Racute;|Ŕ', r"{R'}", txt) # R-acute
txt = re.sub('&#341;|&racute;|ŕ', r"{r'}", txt) # r-acute
txt = re.sub('&#344;|&Rcaron;|Ř', r'{Rˇ}', txt) # R-caron
txt = re.sub('&#345;|&rcaron;|ř', r'{rˇ}', txt) # r-caron
txt = re.sub('&#348;|Ŝ', r'{S^}', txt) # S-circumflex
txt = re.sub('&#349;|ŝ', r'{s^}', txt) # s-circumflex
txt = re.sub('&#352;|&Scaron;|Š', r'{Sˇ}', txt) # S-caron
txt = re.sub('&#353;|&scaron;|š', r'{sˇ}', txt) # s-caron
txt = re.sub('&#356;|&Tcaron;|Ť', r'{Tˇ}', txt) # T-caron
txt = re.sub('&#357;|&tcaron;|ť', r'{tˇ}', txt) # t-caron
txt = re.sub('&#366;|&Uring;|Ů', r'{U°}', txt) # U-ring
txt = re.sub('&#367;|&uring;|ů', r'{u°}', txt) # u-ring
txt = re.sub('&#381;|&Zcaron;|Ž', r'{Zˇ}', txt) # Z-caron
txt = re.sub('&#382;|&zcaron;|ž', r'{zˇ}', txt) # z-caron
txt = re.sub(r'&#340;|&Racute;|Ŕ', r"{R'}", txt) # R-acute
txt = re.sub(r'&#341;|&racute;|ŕ', r"{r'}", txt) # r-acute
txt = re.sub(r'&#344;|&Rcaron;|Ř', r'{Rˇ}', txt) # R-caron
txt = re.sub(r'&#345;|&rcaron;|ř', r'{rˇ}', txt) # r-caron
txt = re.sub(r'&#348;|Ŝ', r'{S^}', txt) # S-circumflex
txt = re.sub(r'&#349;|ŝ', r'{s^}', txt) # s-circumflex
txt = re.sub(r'&#352;|&Scaron;|Š', r'{Sˇ}', txt) # S-caron
txt = re.sub(r'&#353;|&scaron;|š', r'{sˇ}', txt) # s-caron
txt = re.sub(r'&#356;|&Tcaron;|Ť', r'{Tˇ}', txt) # T-caron
txt = re.sub(r'&#357;|&tcaron;|ť', r'{tˇ}', txt) # t-caron
txt = re.sub(r'&#366;|&Uring;|Ů', r'{U°}', txt) # U-ring
txt = re.sub(r'&#367;|&uring;|ů', r'{u°}', txt) # u-ring
txt = re.sub(r'&#381;|&Zcaron;|Ž', r'{Zˇ}', txt) # Z-caron
txt = re.sub(r'&#382;|&zcaron;|ž', r'{zˇ}', txt) # z-caron
txt = re.sub('&#8226;|&bull;|•', r'{*}', txt) # bullet
txt = re.sub('&#8355;|₣', r'{Fr}', txt) # Franc
txt = re.sub('&#8356;|₤', r'{L=}', txt) # Lira
txt = re.sub('&#8360;|₨', r'{Rs}', txt) # Rupee
txt = re.sub('&#8364;|&euro;|€', r'{C=}', txt) # euro
txt = re.sub('&#8482;|&trade;|™', r'{tm}', txt) # trademark
txt = re.sub('&#9824;|&spades;|♠', r'{spade}', txt) # spade
txt = re.sub('&#9827;|&clubs;|♣', r'{club}', txt) # club
txt = re.sub('&#9829;|&hearts;|♥', r'{heart}', txt) # heart
txt = re.sub('&#9830;|&diams;|♦', r'{diamond}', txt) # diamond
txt = re.sub(r'&#8226;|&bull;|•', r'{*}', txt) # bullet
txt = re.sub(r'&#8355;|₣', r'{Fr}', txt) # Franc
txt = re.sub(r'&#8356;|₤', r'{L=}', txt) # Lira
txt = re.sub(r'&#8360;|₨', r'{Rs}', txt) # Rupee
txt = re.sub(r'&#8364;|&euro;|€', r'{C=}', txt) # euro
txt = re.sub(r'&#8482;|&trade;|™', r'{tm}', txt) # trademark
txt = re.sub(r'&#9824;|&spades;|♠', r'{spade}', txt) # spade
txt = re.sub(r'&#9827;|&clubs;|♣', r'{club}', txt) # club
txt = re.sub(r'&#9829;|&hearts;|♥', r'{heart}', txt) # heart
txt = re.sub(r'&#9830;|&diams;|♦', r'{diamond}', txt) # diamond
# Move into main code?
# txt = re.sub('\xa0', r'p. ', txt) # blank paragraph

View File

@ -51,9 +51,9 @@ class MarkdownMLizer(OEB2HTML):
def tidy_up(self, text):
# Remove blank space form beginning of paragraph.
text = re.sub('(?msu)^[ ]{1,3}', '', text)
text = re.sub(r'(?msu)^[ ]{1,3}', '', text)
# pre has 4 spaces. We trimmed 3 so anything with a space left is a pre.
text = re.sub('(?msu)^[ ]', ' ', text)
text = re.sub(r'(?msu)^[ ]', ' ', text)
# Remove tabs that aren't at the beginning of a line
new_text = []
@ -68,7 +68,7 @@ class MarkdownMLizer(OEB2HTML):
text = '\n'.join(new_text)
# Remove spaces from blank lines.
text = re.sub('(?msu)^[ ]+$', '', text)
text = re.sub(r'(?msu)^[ ]+$', '', text)
# Reduce blank lines
text = re.sub('(?msu)\n{7,}', '\n' * 6, text)

View File

@ -34,7 +34,7 @@ def clean_txt(txt):
txt = re.sub('(?m)(?<=^)([ ]{2,}|\t+)(?=.)', '&nbsp;' * 4, txt)
# Condense redundant spaces
txt = re.sub('[ ]{2,}', ' ', txt)
txt = re.sub(r'[ ]{2,}', ' ', txt)
# Remove blank space from the beginning and end of the document.
txt = re.sub(r'^\s+(?=.)', '', txt)
@ -213,7 +213,7 @@ def preserve_spaces(txt):
'''
Replaces spaces multiple spaces with &nbsp; entities.
'''
txt = re.sub('(?P<space>[ ]{2,})', lambda mo: ' ' + ('&nbsp;' * (len(mo.group('space')) - 1)), txt)
txt = re.sub(r'(?P<space>[ ]{2,})', lambda mo: ' ' + ('&nbsp;' * (len(mo.group('space')) - 1)), txt)
txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')
return txt
@ -325,9 +325,9 @@ def detect_formatting_type(txt):
# Check for markdown
# Headings
markdown_count += len(re.findall('(?mu)^#+', txt))
markdown_count += len(re.findall('(?mu)^=+$', txt))
markdown_count += len(re.findall('(?mu)^-+$', txt))
markdown_count += len(re.findall(r'(?mu)^#+', txt))
markdown_count += len(re.findall(r'(?mu)^=+$', txt))
markdown_count += len(re.findall(r'(?mu)^-+$', txt))
# Images
markdown_count += len(re.findall(r'(?u)!\[.*?\](\[|\()', txt))
# Links

View File

@ -126,7 +126,7 @@ class TXTMLizer:
text = re.sub('(?<=.)\n(?=.)', ' ', text)
# Remove multiple spaces.
text = re.sub('[ ]{2,}', ' ', text)
text = re.sub(r'[ ]{2,}', ' ', text)
# Remove excessive newlines.
text = re.sub('\n[ ]+\n', '\n\n', text)
@ -140,8 +140,8 @@ class TXTMLizer:
# Replace spaces at the beginning and end of lines
# We don't replace tabs because those are only added
# when remove paragraph spacing is enabled.
text = re.sub('(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text)
text = re.sub(r'(?imu)^[ ]+', '', text)
text = re.sub(r'(?imu)[ ]+$', '', text)
# Remove empty space and newlines at the beginning of the document.
text = re.sub(r'(?u)^[ \n]+', '', text)

View File

@ -406,7 +406,7 @@ class SearchDialog(QDialog):
self.resize(self.sizeHint())
def retrieve_template_search(self):
template, sep, query = re.split('#@#:([tdnb]):', self.current_search_text, flags=re.IGNORECASE)
template, sep, query = re.split(r'#@#:([tdnb]):', self.current_search_text, flags=re.IGNORECASE)
self.template_value_box.setText(query)
cb = self.template_test_type_box
for idx in range(0, cb.count()):

View File

@ -744,7 +744,7 @@ class CreateCustomColumn(QDialog):
return self.simple_error('', _('The colors box must be empty or '
'contain the same number of items as the value box'))
for tc in c:
if tc not in QColor.colorNames() and not re.match('#(?:[0-9a-f]{3}){1,4}',tc,re.I):
if tc not in QColor.colorNames() and not re.match(r'#(?:[0-9a-f]{3}){1,4}',tc,re.I):
return self.simple_error('', _('The color {0} is unknown').format(tc))
display_dict = {'enum_values': l, 'enum_colors': c}
if default_val:

View File

@ -146,7 +146,7 @@ class EmailAccounts(QAbstractTableModel): # {{{
if aval:
self.tags[account] = aval
elif col == 1:
self.accounts[account][0] = re.sub(',+', ',', re.sub(r'\s+', ',', as_unicode(value or '').upper()))
self.accounts[account][0] = re.sub(r',+', ',', re.sub(r'\s+', ',', as_unicode(value or '').upper()))
elif col == 0:
na = as_unicode(value or '').strip()
from email.utils import parseaddr

View File

@ -920,6 +920,6 @@ if __name__ == '__main__': # {{{
def callback(ed):
import regex
ed.find_text(regex.compile('A bold word'))
ed.find_text(regex.compile(r'A bold word'))
launch_editor(raw, path_is_raw=True, syntax='html', callback=callback)
# }}}

View File

@ -3828,7 +3828,7 @@ class CatalogBuilder:
# if self.opts.numbers_as_text and re.match('[0-9]+',word[0]):
translated.append(NumberToText(word).text.capitalize())
else:
if re.match('[0-9]+', word[0]):
if re.match(r'[0-9]+', word[0]):
word = word.replace(',', '')
suffix = re.search(r'[\D]', word)
if suffix:
@ -3844,7 +3844,7 @@ class CatalogBuilder:
translated.append(capitalize(word))
else:
if re.search('[0-9]+', word[0]):
if re.search(r'[0-9]+', word[0]):
word = word.replace(',', '')
suffix = re.search(r'[\D]', word)
if suffix:
@ -4114,7 +4114,7 @@ class CatalogBuilder:
Return:
(str): char if A-z, else SYMBOLS
'''
if not re.search('[a-zA-Z]', ascii_text(char)):
if not re.search(r'[a-zA-Z]', ascii_text(char)):
return self.SYMBOLS
else:
return char

View File

@ -87,7 +87,7 @@ class NumberToText: # {{{
self.log('numberTranslate(): %s' % self.number)
# Special case ordinals
if re.search('[st|nd|rd|th]',self.number):
if re.search(r'[st|nd|rd|th]',self.number):
self.number = self.number.replace(',', '')
ordinal_suffix = re.search(r'[\D]', self.number)
ordinal_number = re.sub(r'\D','',self.number.replace(',', ''))
@ -134,7 +134,7 @@ class NumberToText: # {{{
self.log('Hyphenated: %s' % self.number)
self.number_as_float = self.number.split('-')[0]
strings = self.number.split('-')
if re.search('[0-9]+', strings[0]):
if re.search(r'[0-9]+', strings[0]):
left = NumberToText(strings[0]).text
right = strings[1]
else:
@ -143,7 +143,7 @@ class NumberToText: # {{{
self.text = f'{left}-{right}'
# Test for only commas and numbers
elif ',' in self.number and not re.search('[^0-9,]',self.number):
elif ',' in self.number and not re.search(r'[^0-9,]',self.number):
if self.verbose:
self.log('Comma(s): %s' % self.number)
self.number_as_float = self.number.replace(',', '')

View File

@ -1504,11 +1504,11 @@ def text_to_tokens(text):
text = match.group(1)
OR = True
tokens = []
quot = re.search('"(.*?)"', text)
quot = re.search(r'"(.*?)"', text)
while quot:
tokens.append(quot.group(1))
text = text.replace('"'+quot.group(1)+'"', '')
quot = re.search('"(.*?)"', text)
quot = re.search(r'"(.*?)"', text)
tokens += text.split(' ')
ans = []
for i in tokens:

View File

@ -2556,7 +2556,7 @@ class BibTeX:
self.invalid_cit = re.compile('[ "@\',\\#}{~%&$^]')
self.upper = re.compile('[' +
string.ascii_uppercase + ']')
self.escape = re.compile('[#&%_]')
self.escape = re.compile(r'[#&%_]')
def ValidateCitationKey(self, text):
'''

View File

@ -59,7 +59,7 @@ def get_opts_from_parser(parser, prefix):
def send(ans):
pat = re.compile('([^0-9a-zA-Z_./-])')
pat = re.compile(r'([^0-9a-zA-Z_./-])')
for x in sorted(set(ans)):
x = pat.sub(lambda m : '\\'+m.group(1), x)
if x.endswith('\\ '):

View File

@ -384,7 +384,7 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
repl_func = partial(fd_repl_func, dt, 'ap' in format.lower())
return re.sub(
'(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))',
r'(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))',
repl_func, format)
# }}}
@ -460,7 +460,7 @@ def clean_date_for_sort(dt, fmt=None):
'min':UNDEFINED_DATE.minute, 'sec':UNDEFINED_DATE.second}
repl_func = partial(cd_repl_func, tt, dt)
re.sub('(s{1,2})|(m{1,2})|(h{1,2})|(d{1,4}|M{1,4}|(?:yyyy|yy))', repl_func, fmt)
re.sub(r'(s{1,2})|(m{1,2})|(h{1,2})|(d{1,4}|M{1,4}|(?:yyyy|yy))', repl_func, fmt)
return dt.replace(year=tt['year'], month=tt['mon'], day=tt['day'], hour=tt['hour'],
minute=tt['min'], second=tt['sec'], microsecond=0)
# }}}

View File

@ -90,7 +90,7 @@ def get_system_locale():
def sanitize_lang(lang):
if lang:
match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
match = re.match(r'[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
if match:
lang = match.group()
if lang == 'zh':

View File

@ -195,7 +195,7 @@ class Parser:
def tokenize(self, expr):
# convert docstrings to base64 to avoid all processing. Change the docstring
# indicator to something unique with no characters special to the parser.
expr = re.sub('(""")(..*?)(""")',
expr = re.sub(r'(""")(..*?)(""")',
lambda mo: self.docstring_sep + as_hex_unicode(mo.group(2)) + self.docstring_sep,
expr, flags=re.DOTALL)

View File

@ -1730,7 +1730,7 @@ class BasicNewsRecipe(Recipe):
def error_in_article_download(self, request, traceback):
self.jobs_done += 1
if traceback and re.search('^AbortArticle:', traceback, flags=re.M) is not None:
if traceback and re.search(r'^AbortArticle:', traceback, flags=re.M) is not None:
self.log.warn('Aborted download of article:', request.article.title,
'from', request.article.url)
self.report_progress(float(self.jobs_done)/len(self.jobs),

View File

@ -59,7 +59,7 @@ def styleFromList(styleName, specArray, spacing, showAllLevels):
numbered = False
displayLevels = 0
listStyle = ListStyle(name=styleName)
numFormatPattern = re.compile('([1IiAa])')
numFormatPattern = re.compile(r'([1IiAa])')
cssLengthPattern = re.compile('([^a-z]+)\\s*([a-z]+)?')
m = cssLengthPattern.search(spacing)
if (m is not None):