always use raw-string for regex (auto-fix)

ruff 'RUF039'
This commit is contained in:
un-pogaz 2025-01-24 11:14:20 +01:00
parent 567a0187f3
commit ac6912565a
97 changed files with 315 additions and 314 deletions

View File

@ -49,7 +49,7 @@ def merge():
clone_node(child, symbol) clone_node(child, symbol)
ans.append(symbol) ans.append(symbol)
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False) ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1) ans = re.sub(r'<svg[^>]+>', '<svg style="display:none">', ans, count=1)
return ans return ans

View File

@ -29,6 +29,6 @@ class AlejaKomiksu(BasicNewsRecipe):
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
tag = soup.find(attrs={'class': 'rodzaj'}) tag = soup.find(attrs={'class': 'rodzaj'})
if tag and tag.a.string.lower().strip() == 'recenzje': if tag and tag.a.string.lower().strip() == 'recenzje':
link = soup.find(text=re.compile('recenzuje')) link = soup.find(text=re.compile(r'recenzuje'))
if link: if link:
return self.index_to_soup(link.parent['href'], raw=True) return self.index_to_soup(link.parent['href'], raw=True)

View File

@ -63,12 +63,12 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
dict( dict(
attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}), attrs={'class': ['socialbar', 'social-sharing flank', 'vel', 'back']}),
dict(name='img', attrs={'alt': 'logo'}), dict(name='img', attrs={'alt': 'logo'}),
dict(name='div', attrs={'class': re.compile('infoEl')}), dict(name='div', attrs={'class': re.compile(r'infoEl')}),
dict(name='span', attrs={'class': re.compile('loupe')}) dict(name='span', attrs={'class': re.compile(r'loupe')})
] ]
remove_tags_after = [ remove_tags_after = [
dict(name='div', attrs={'itemprop': re.compile('articleBody')}) dict(name='div', attrs={'itemprop': re.compile(r'articleBody')})
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -58,7 +58,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.birminghammail.co.uk') soup = self.index_to_soup('http://www.birminghammail.co.uk')
cov = soup.find(attrs={'src': re.compile( cov = soup.find(attrs={'src': re.compile(
'http://images.icnetwork.co.uk/upl/birm')}) r'http://images.icnetwork.co.uk/upl/birm')})
cov = str(cov) cov = str(cov)
cov2 = re.findall( cov2 = re.findall(
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}), name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}), dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}), dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self): def get_cover_url(self):

View File

@ -39,12 +39,12 @@ class CSMonitor(BasicNewsRecipe):
} }
remove_tags = [ remove_tags = [
dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict(attrs={'class': re.compile('(^|| )podStoryRel($|| )', re.DOTALL)}), dict( dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict(attrs={'class': re.compile(r'(^|| )podStoryRel($|| )', re.DOTALL)}), dict(
attrs={'class': ['bottom-rel', 'hide']}), dict(attrs={'id': ['pgallerycarousel_enlarge', 'pgallerycarousel_related']}) attrs={'class': ['bottom-rel', 'hide']}), dict(attrs={'id': ['pgallerycarousel_enlarge', 'pgallerycarousel_related']})
] ]
keep_only_tags = [ keep_only_tags = [
dict(name='h1', attrs={'class': 'head'}), dict(name='h2', attrs={'class': 'subhead'}), dict(attrs={'class': [ dict(name='h1', attrs={'class': 'head'}), dict(name='h2', attrs={'class': 'subhead'}), dict(attrs={'class': [
'sByline', 'thePhoto', 'ui-body-header']}), dict(attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)}) 'sByline', 'thePhoto', 'ui-body-header']}), dict(attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)})
] ]
remove_attributes = ['xmlns:fb'] remove_attributes = ['xmlns:fb']
@ -74,11 +74,11 @@ class CSMonitor(BasicNewsRecipe):
nurl = 'http://www.csmonitor.com' + nexttag['href'] nurl = 'http://www.csmonitor.com' + nexttag['href']
soup2 = self.index_to_soup(nurl) soup2 = self.index_to_soup(nurl)
texttag = soup2.find( texttag = soup2.find(
attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)}) attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)})
if texttag: if texttag:
appendtag = soup.find( appendtag = soup.find(
attrs={'class': re.compile('(^|| )sBody($|| )', re.DOTALL)}) attrs={'class': re.compile(r'(^|| )sBody($|| )', re.DOTALL)})
for citem in texttag.findAll(attrs={'class': [re.compile('(^|| )podStoryRel($|| )', re.DOTALL), 'bottom-rel', 'hide']}): for citem in texttag.findAll(attrs={'class': [re.compile(r'(^|| )podStoryRel($|| )', re.DOTALL), 'bottom-rel', 'hide']}):
citem.extract() citem.extract()
self.append_page(soup2) self.append_page(soup2)
texttag.extract() texttag.extract()

View File

@ -47,7 +47,7 @@ class Chronicle(BasicNewsRecipe):
# Find cover # Find cover
cover = soup0.find('div', attrs={ cover = soup0.find('div', attrs={
'class': 'side-content'}).find(attrs={'src': re.compile('photos/biz/Current')}) 'class': 'side-content'}).find(attrs={'src': re.compile(r'photos/biz/Current')})
if cover is not None: if cover is not None:
if 'chronicle.com' in cover['src']: if 'chronicle.com' in cover['src']:
self.cover_url = cover['src'] self.cover_url = cover['src']

View File

@ -86,7 +86,7 @@ class CourrierInternational(BasicNewsRecipe):
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):
for link in soup.findAll('a', href=re.compile('^/')): for link in soup.findAll('a', href=re.compile(r'^/')):
link['href'] = 'http://www.courrierinternational.com' + link['href'] link['href'] = 'http://www.courrierinternational.com' + link['href']
return soup return soup

View File

@ -71,10 +71,10 @@ class AdvancedUserRecipe1467571059(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name=['embed', 'object']), dict(name=['embed', 'object']),
dict(name='div', attrs={'class':['note NotePortrait', 'note']}), dict(name='div', attrs={'class':['note NotePortrait', 'note']}),
dict(name='ul', attrs={'class':re.compile('article__share')}), dict(name='ul', attrs={'class':re.compile(r'article__share')}),
dict(name='div', attrs={'class':'slideshow__controls'}), dict(name='div', attrs={'class':'slideshow__controls'}),
dict(name='a', attrs={'role':'button'}), dict(name='a', attrs={'role':'button'}),
dict(name='figure', attrs={'class':re.compile('video')}) dict(name='figure', attrs={'class':re.compile(r'video')})
] ]
remove_attributes = ['width', 'height'] remove_attributes = ['width', 'height']

View File

@ -31,9 +31,9 @@ class deredactie(BasicNewsRecipe):
catnames = {} catnames = {}
soup = self.index_to_soup( soup = self.index_to_soup(
'http://www.deredactie.be/cm/vrtnieuws.deutsch') 'http://www.deredactie.be/cm/vrtnieuws.deutsch')
for elem in soup.findAll('li', attrs={'id': re.compile('^navItem[2-9]')}): for elem in soup.findAll('li', attrs={'id': re.compile(r'^navItem[2-9]')}):
a = elem.find('a', href=True) a = elem.find('a', href=True)
m = re.search('(?<=/)[^/]*$', a['href']) m = re.search(r'(?<=/)[^/]*$', a['href'])
cat = str(m.group(0)) cat = str(m.group(0))
categories.append(cat) categories.append(cat)
catnames[cat] = a['title'] catnames[cat] = a['title']
@ -45,7 +45,7 @@ class deredactie(BasicNewsRecipe):
articles = [] articles = []
soup = self.index_to_soup( soup = self.index_to_soup(
'http://www.deredactie.be/cm/vrtnieuws.deutsch/' + cat) 'http://www.deredactie.be/cm/vrtnieuws.deutsch/' + cat)
for a in soup.findAll('a', attrs={'href': re.compile('deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_')}): for a in soup.findAll('a', attrs={'href': re.compile(r'deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_')}):
skip_this_article = False skip_this_article = False
url = a['href'].strip() url = a['href'].strip()
if url.startswith('/'): if url.startswith('/'):

View File

@ -51,7 +51,7 @@ class Donga(BasicNewsRecipe):
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1 # https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
# Return print version url with syntax: # Return print version url with syntax:
# https://www.donga.com/news/View?gid=[gid]&date=[date] # https://www.donga.com/news/View?gid=[gid]&date=[date]
reobject = re.search('(?<=/all/)([0-9]*)/([0-9]*)', url) reobject = re.search(r'(?<=/all/)([0-9]*)/([0-9]*)', url)
date = reobject.group(1) date = reobject.group(1)
gid = reobject.group(2) gid = reobject.group(2)

View File

@ -33,7 +33,7 @@ class dwutygodnik(BasicNewsRecipe):
browser.open('http://www.dwutygodnik.com/') browser.open('http://www.dwutygodnik.com/')
# find the link # find the link
epublink = browser.find_link(text_regex=re.compile('Wydanie EPUB')) epublink = browser.find_link(text_regex=re.compile(r'Wydanie EPUB'))
# download ebook # download ebook
self.report_progress(0, _('Downloading ePUB')) self.report_progress(0, _('Downloading ePUB'))

View File

@ -21,8 +21,8 @@ class Dziennik_pl(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}' extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}'
preprocess_regexps = [(re.compile('Komentarze:'), lambda m: ''), (re.compile( preprocess_regexps = [(re.compile(r'Komentarze:'), lambda m: ''), (re.compile(
'<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')] r'<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
keep_only_tags = [dict(id='article')] keep_only_tags = [dict(id='article')]
remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa: E501 remove_tags = [dict(name='div', attrs={'class': ['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class': ['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] # noqa: E501
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'), feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}), name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}), dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}), dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self): def get_cover_url(self):

View File

@ -51,7 +51,7 @@ class Esensja(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('http://www.esensja.pl/magazyn/') soup = self.index_to_soup('http://www.esensja.pl/magazyn/')
a = soup.find('a', attrs={'href': re.compile('.*/index.html')}) a = soup.find('a', attrs={'href': re.compile(r'.*/index.html')})
year = a['href'].split('/')[0] year = a['href'].split('/')[0]
month = a['href'].split('/')[1] month = a['href'].split('/')[1]
self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/' self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/'
@ -149,7 +149,7 @@ class Esensja(BasicNewsRecipe):
info = tag.find(attrs={'class': 'img_info'}) info = tag.find(attrs={'class': 'img_info'})
text = str(tag) text = str(tag)
if not src: if not src:
src = re.search('src="[^"]*?"', text) src = re.search(r'src="[^"]*?"', text)
if src: if src:
src = src.group(0) src = src.group(0)
src = src[5:].replace('//', '/') src = src[5:].replace('//', '/')

View File

@ -95,7 +95,7 @@ class EsensjaRSS(BasicNewsRecipe):
info = tag.find(attrs={'class': 'img_info'}) info = tag.find(attrs={'class': 'img_info'})
text = str(tag) text = str(tag)
if not src: if not src:
src = re.search('src="[^"]*?"', text) src = re.search(r'src="[^"]*?"', text)
if src: if src:
src = src.group(0) src = src.group(0)
src = src[5:].replace('//', '/') src = src[5:].replace('//', '/')

View File

@ -109,7 +109,7 @@ img { background: none !important; float: none; margin: 0px; }
for post in soup.findAll('a'): for post in soup.findAll('a'):
strpost = str(post) strpost = str(post)
if re.match('<a href="https://www1.folha.uol.com.br/.*/"><svg aria-hidden="true" class="icon icon--star"', strpost): if re.match(r'<a href="https://www1.folha.uol.com.br/.*/"><svg aria-hidden="true" class="icon icon--star"', strpost):
if articles: if articles:
feeds.append((section_title, articles)) feeds.append((section_title, articles))
self.log() self.log()

View File

@ -39,7 +39,7 @@ class AdvancedUserRecipe1515196393(BasicNewsRecipe):
feeds = [] feeds = []
br = self.get_browser() br = self.get_browser()
self.ctdir = PersistentTemporaryDirectory() self.ctdir = PersistentTemporaryDirectory()
for x in toc.findAll(['li'], attrs={'class': re.compile('.*get_content.*')}): for x in toc.findAll(['li'], attrs={'class': re.compile(r'.*get_content.*')}):
edwo = x.find('a') edwo = x.find('a')
title = self.tag_to_string(edwo) title = self.tag_to_string(edwo)
self.log('\t\tFound article:', title) self.log('\t\tFound article:', title)

View File

@ -54,7 +54,7 @@ class GN(BasicNewsRecipe):
}] }]
feeds.append((u'Na dobry początek', articles)) feeds.append((u'Na dobry początek', articles))
# columns: # columns:
for addr in soup.findAll('a', attrs={'href': re.compile('kategoria')}): for addr in soup.findAll('a', attrs={'href': re.compile(r'kategoria')}):
if not addr.span: if not addr.span:
main_block = self.index_to_soup( main_block = self.index_to_soup(
'http://www.gosc.pl' + addr['href']) 'http://www.gosc.pl' + addr['href'])

View File

@ -50,7 +50,7 @@ class GN(BasicNewsRecipe):
}] }]
feeds.append((u'Na dobry początek', articles)) feeds.append((u'Na dobry początek', articles))
# columns: # columns:
for addr in soup.findAll('a', attrs={'href': re.compile('kategoria')}): for addr in soup.findAll('a', attrs={'href': re.compile(r'kategoria')}):
if not addr.span: if not addr.span:
main_block = self.index_to_soup( main_block = self.index_to_soup(
'http://www.gosc.pl' + addr['href']) 'http://www.gosc.pl' + addr['href'])

View File

@ -50,10 +50,10 @@ class GazetvanAntwerpen(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name=['embed', 'object']), dict(name=['embed', 'object']),
dict(name='div', attrs={'class': ['note NotePortrait', 'note']}), dict(name='div', attrs={'class': ['note NotePortrait', 'note']}),
dict(name='ul', attrs={'class': re.compile('article__share')}), dict(name='ul', attrs={'class': re.compile(r'article__share')}),
dict(name='div', attrs={'class': 'slideshow__controls'}), dict(name='div', attrs={'class': 'slideshow__controls'}),
dict(name='a', attrs={'role': 'button'}), dict(name='a', attrs={'role': 'button'}),
dict(name='figure', attrs={'class': re.compile('video')}) dict(name='figure', attrs={'class': re.compile(r'video')})
] ]
remove_attributes = ['width', 'height'] remove_attributes = ['width', 'height']

View File

@ -78,7 +78,7 @@ class HNWithCommentsLink(BasicNewsRecipe):
br = td.find('br') br = td.find('br')
if br: if br:
br.extract() br.extract()
reply = td.find('a', attrs={'href': re.compile('^reply?')}) reply = td.find('a', attrs={'href': re.compile(r'^reply?')})
if reply: if reply:
reply.parent.extract() reply.parent.extract()
td.name = 'div' td.name = 'div'

View File

@ -59,7 +59,7 @@ class Handelsblatt(BasicNewsRecipe):
dict(name='aside', attrs={'class': ['vhb-article-element vhb-left', dict(name='aside', attrs={'class': ['vhb-article-element vhb-left',
'vhb-article-element vhb-left vhb-teasergallery', 'vhb-article-element vhb-left vhb-teasergallery',
'vhb-article-element vhb-left vhb-shorttexts']}), 'vhb-article-element vhb-left vhb-shorttexts']}),
dict(name='aside', attrs={'class': re.compile('vhb-club-events')}), dict(name='aside', attrs={'class': re.compile(r'vhb-club-events')}),
dict(name='article', attrs={'class': ['vhb-imagegallery vhb-teaser', dict(name='article', attrs={'class': ['vhb-imagegallery vhb-teaser',
'vhb-teaser vhb-type-video']}), 'vhb-teaser vhb-type-video']}),
dict(name='small', attrs={'class': ['vhb-credit']}), dict(name='small', attrs={'class': ['vhb-credit']}),
@ -70,14 +70,14 @@ class Handelsblatt(BasicNewsRecipe):
'opinary-widget-wrapper', 'opinary-widget-wrapper',
'vhb-article__content-element--shorttextgallery', 'vhb-article__content-element--shorttextgallery',
'vhb-hollow-area vhb-hollow-area--col-1']}), 'vhb-hollow-area vhb-hollow-area--col-1']}),
dict(name='div', attrs={'class': re.compile('stepstone')}), dict(name='div', attrs={'class': re.compile(r'stepstone')}),
dict(name='div', attrs={'class': re.compile('vhb-imagegallery')}), dict(name='div', attrs={'class': re.compile(r'vhb-imagegallery')}),
dict(name='div', attrs={'id': ['highcharts_infografik']}), dict(name='div', attrs={'id': ['highcharts_infografik']}),
dict(name='div', attrs={'id': re.compile('dax-sentiment')}), dict(name='div', attrs={'id': re.compile(r'dax-sentiment')}),
dict(name=['div', 'section'], attrs={'class': re.compile('slider')}), dict(name=['div', 'section'], attrs={'class': re.compile(r'slider')}),
dict(name='a', attrs={'class': ['twitter-follow-button']}), dict(name='a', attrs={'class': ['twitter-follow-button']}),
dict(name='img', attrs={'class': ['highlight-icon', 'lb-author__avatar', 'pin-icon']}), dict(name='img', attrs={'class': ['highlight-icon', 'lb-author__avatar', 'pin-icon']}),
dict(name='img', attrs={'alt': re.compile('Handelsblatt Morning Briefing')}), dict(name='img', attrs={'alt': re.compile(r'Handelsblatt Morning Briefing')}),
dict(name=['blockquote', 'button', 'link']) dict(name=['blockquote', 'button', 'link'])
] ]
@ -138,7 +138,7 @@ class Handelsblatt(BasicNewsRecipe):
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):
# convert lists of author(s) and date(s) into simple text # convert lists of author(s) and date(s) into simple text
for cap in soup.find_all('div', {'class': re.compile('vhb-article-caption')}): for cap in soup.find_all('div', {'class': re.compile(r'vhb-article-caption')}):
cap.replace_with(cap.encode_contents().decode('utf-8').strip() + ' ') cap.replace_with(cap.encode_contents().decode('utf-8').strip() + ' ')
for row in soup.find_all('div', {'class': 'vhb-article-author-row'}): for row in soup.find_all('div', {'class': 'vhb-article-author-row'}):
for ul in row.find_all('ul'): for ul in row.find_all('ul'):
@ -160,7 +160,7 @@ class Handelsblatt(BasicNewsRecipe):
fig.find('div', {'class': 'vhb-caption'}).replace_with(cap) fig.find('div', {'class': 'vhb-caption'}).replace_with(cap)
# remove references to related articles # remove references to related articles
for strong in soup.find_all('strong'): for strong in soup.find_all('strong'):
if strong.string and (re.match('^Mehr:? ?', strong.string) or re.match('^>>.*', strong.string)): if strong.string and (re.match(r'^Mehr:? ?', strong.string) or re.match(r'^>>.*', strong.string)):
p_parent = strong.find_parent('p') p_parent = strong.find_parent('p')
if p_parent: if p_parent:
p_parent.decompose() p_parent.decompose()

View File

@ -49,7 +49,7 @@ class HistoryToday(BasicNewsRecipe):
# Go to issue # Go to issue
soup = self.index_to_soup('https://www.historytoday.com/contents') soup = self.index_to_soup('https://www.historytoday.com/contents')
cover = soup.find('div', attrs={ cover = soup.find('div', attrs={
'id': 'content-area'}).find('img', attrs={'src': re.compile('.*cover.*')})['src'] 'id': 'content-area'}).find('img', attrs={'src': re.compile(r'.*cover.*')})['src']
self.cover_url = cover self.cover_url = cover
self.log(self.cover_url) self.log(self.cover_url)

View File

@ -89,7 +89,7 @@ class IndiaToday(BasicNewsRecipe):
return soup return soup
def preprocess_raw_html(self, raw, *a): def preprocess_raw_html(self, raw, *a):
m = re.search('id="__NEXT_DATA__" type="application/json">', raw) m = re.search(r'id="__NEXT_DATA__" type="application/json">', raw)
raw = raw[m.start():] raw = raw[m.start():]
raw = raw.split('>', 1)[1] raw = raw.split('>', 1)[1]
data = json.JSONDecoder().raw_decode(raw)[0] data = json.JSONDecoder().raw_decode(raw)[0]

View File

@ -36,7 +36,7 @@ class JoopRecipe(BasicNewsRecipe):
keep_only_tags.append( keep_only_tags.append(
dict(name='h2', attrs={'class': 'columnhead smallline'})) dict(name='h2', attrs={'class': 'columnhead smallline'}))
keep_only_tags.append( keep_only_tags.append(
dict(name='div', attrs={'class': re.compile('article.*')})) dict(name='div', attrs={'class': re.compile(r'article.*')}))
extra_css = ''' extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;} body {font-family: verdana, arial, helvetica, geneva, sans-serif;}

View File

@ -44,16 +44,16 @@ class Kurier(BasicNewsRecipe):
] ]
keep_only_tags = [ keep_only_tags = [
dict(name='article', attrs={'class': re.compile('main-article')}) dict(name='article', attrs={'class': re.compile(r'main-article')})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class': 'social-media-container'}), dict(name='div', attrs={'class': 'social-media-container'}),
dict(name='section', attrs={'class': 'tags'}), dict(name='section', attrs={'class': 'tags'}),
dict(name='section', attrs={'class': re.compile('comment-box')}), dict(name='section', attrs={'class': re.compile(r'comment-box')}),
dict(name='section', attrs={'class': re.compile('related-content')}), dict(name='section', attrs={'class': re.compile(r'related-content')}),
dict(name='section', attrs={'class': re.compile('article-slider')}), dict(name='section', attrs={'class': re.compile(r'article-slider')}),
dict(name='section', attrs={'class': re.compile('commentcontainer')}), dict(name='section', attrs={'class': re.compile(r'commentcontainer')}),
dict(name='blockquote') dict(name='blockquote')
] ]

View File

@ -21,7 +21,7 @@ class Kyungyhang(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
preprocess_regexps = [ preprocess_regexps = [
(re.compile("<div class='ad_movFocus'.*</html>", (re.compile(r"<div class='ad_movFocus'.*</html>",
re.DOTALL | re.IGNORECASE), lambda match: '</html>'), re.DOTALL | re.IGNORECASE), lambda match: '</html>'),
] ]

View File

@ -121,7 +121,7 @@ class LeMondeAbonne(BasicNewsRecipe):
files = os.listdir(path) files = os.listdir(path)
nb_index_files = len([ nb_index_files = len([
name for name in files if re.match('frame_gauche_[0-9]+.html', name) name for name in files if re.match(r'frame_gauche_[0-9]+.html', name)
]) ])
flux = [] flux = []

View File

@ -144,7 +144,7 @@ class WeeklyLWN(BasicNewsRecipe):
# Most articles have anchors in their titles, *except* the # Most articles have anchors in their titles, *except* the
# security vulnerabilities # security vulnerabilities
article_anchor = curr.find( article_anchor = curr.find(
name='a', attrs={'href': re.compile('^/Articles/')}) name='a', attrs={'href': re.compile(r'^/Articles/')})
if article_anchor: if article_anchor:
article_url = article_anchor.get('href') article_url = article_anchor.get('href')

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}), name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}), dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}), dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self): def get_cover_url(self):

View File

@ -71,21 +71,21 @@ class Newsweek(BasicNewsRecipe):
strong = p.find('strong') strong = p.find('strong')
if strong: if strong:
newest = re.compile( newest = re.compile(
'Tekst pochodzi z najnowszego numeru Tygodnika Newsweek') r'Tekst pochodzi z najnowszego numeru Tygodnika Newsweek')
if newest.search(str(strong)): if newest.search(str(strong)):
strong.extract() strong.extract()
continue continue
itunes = p.find('a') itunes = p.find('a')
if itunes: if itunes:
reurl = re.compile('itunes.apple.com') reurl = re.compile(r'itunes.apple.com')
if reurl.search(str(itunes['href'])): if reurl.search(str(itunes['href'])):
p.extract() p.extract()
continue continue
imagedesc = p.find('div', attrs={'class': 'image-desc'}) imagedesc = p.find('div', attrs={'class': 'image-desc'})
if imagedesc: if imagedesc:
redesc = re.compile('Okładka numeru') redesc = re.compile(r'Okładka numeru')
if (redesc.search(str(imagedesc))): if (redesc.search(str(imagedesc))):
p.extract() p.extract()
continue continue

View File

@ -77,10 +77,10 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe):
print('-------------------------get index of paper--------------------------------') print('-------------------------get index of paper--------------------------------')
result = [] result = []
soup = self.index_to_soup('http://www.nikkei.com/paper/') soup = self.index_to_soup('http://www.nikkei.com/paper/')
sections = soup.findAll(attrs={'class': re.compile('.*cmn-article_title.*')}) sections = soup.findAll(attrs={'class': re.compile(r'.*cmn-article_title.*')})
for sect in sections: for sect in sections:
sect_title = sect.find(attrs={'class' : re.compile('.*cmnc-((large)|(middle)|(small)).*')}) sect_title = sect.find(attrs={'class' : re.compile(r'.*cmnc-((large)|(middle)|(small)).*')})
if sect_title is None: if sect_title is None:
continue continue
sect_title = sect_title.contents[0] sect_title = sect_title.contents[0]

View File

@ -62,7 +62,7 @@ class NRCNext(BasicNewsRecipe):
zfile = zipfile.ZipFile(BytesIO(epubraw), 'r') zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
zfile.extractall(self.output_dir) zfile.extractall(self.output_dir)
namelist = zfile.namelist() namelist = zfile.namelist()
emre = re.compile('&lt;em(?:.*)&gt;(.*)&lt;/em&gt;') emre = re.compile(r'&lt;em(?:.*)&gt;(.*)&lt;/em&gt;')
subst = '\\1' subst = '\\1'
for name in namelist: for name in namelist:
_, ext = os.path.splitext(name) _, ext = os.path.splitext(name)

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}), name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}), dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}), dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self): def get_cover_url(self):

View File

@ -48,7 +48,7 @@ class outlook(BasicNewsRecipe):
return [('Articles', ans)] return [('Articles', ans)]
def preprocess_raw_html(self, raw, *a): def preprocess_raw_html(self, raw, *a):
m = re.search('id="__NEXT_DATA__" type="application/json">', raw) m = re.search(r'id="__NEXT_DATA__" type="application/json">', raw)
raw = raw[m.start():] raw = raw[m.start():]
raw = raw.split('>', 1)[1] raw = raw.split('>', 1)[1]
data = json.JSONDecoder().raw_decode(raw)[0] data = json.JSONDecoder().raw_decode(raw)[0]

View File

@ -41,9 +41,9 @@ class Polter(BasicNewsRecipe):
(u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')] (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for s in soup.findAll(attrs={'style': re.compile('float: ?left')}): for s in soup.findAll(attrs={'style': re.compile(r'float: ?left')}):
s['class'] = 'floatleft' s['class'] = 'floatleft'
for s in soup.findAll(attrs={'style': re.compile('float: ?right')}): for s in soup.findAll(attrs={'style': re.compile(r'float: ?right')}):
s['class'] = 'floatright' s['class'] = 'floatright'
for s in soup.findAll(style=True): for s in soup.findAll(style=True):
if 'bold;' in s['style']: if 'bold;' in s['style']:

View File

@ -161,9 +161,9 @@ class PrivateEyeRecipe(BasicNewsRecipe):
{'name': 'div', 'attrs': {'id': 'about-covers'}}, {'name': 'div', 'attrs': {'id': 'about-covers'}},
{'name': 'a', ' attrs': {'href': 'https://shop.private-eye.co.uk'}}, {'name': 'a', ' attrs': {'href': 'https://shop.private-eye.co.uk'}},
{'name': 'iframe'}, {'name': 'iframe'},
{'name': 'link', 'attrs': {'href': re.compile('/javastyle/lightbox/')}}, {'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/lightbox/')}},
{'name': 'link', 'attrs': {'href': re.compile('/javastyle/news_ticker/')}}, {'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/news_ticker/')}},
{'name': 'link', 'attrs': {'href': re.compile('/javastyle/media-queries-')}}, {'name': 'link', 'attrs': {'href': re.compile(r'/javastyle/media-queries-')}},
] ]
# Convert headers to h1, strapline to h4 # Convert headers to h1, strapline to h4

View File

@ -54,7 +54,7 @@ class ScienceNewsIssue(BasicNewsRecipe):
# Get articles # Get articles
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
soup = soup.find('main', attrs={'id':'content'}) soup = soup.find('main', attrs={'id':'content'})
re_article = re.compile('https://www.sciencenews.org/article/') re_article = re.compile(r'https://www.sciencenews.org/article/')
stories = [] stories = []
past_urls = set() past_urls = set()
for sec in soup.find_all(href=re_article): for sec in soup.find_all(href=re_article):

View File

@ -76,8 +76,8 @@ class SolHaberRecipe(BasicNewsRecipe):
result = [] result = []
articles_dict = {} articles_dict = {}
author_regexp = re.compile('^http://.*?/yazarlar/(.*?)/.*$') author_regexp = re.compile(r'^http://.*?/yazarlar/(.*?)/.*$')
category_regexp = re.compile('^http://.*?/(.+?)/.*$') category_regexp = re.compile(r'^http://.*?/(.+?)/.*$')
for section_tuple in self.section_tuples: for section_tuple in self.section_tuples:

View File

@ -43,7 +43,7 @@ class StandardMediaKeRecipe(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
import re import re
p = re.compile('http://www.standardmedia.co.ke/.*InsidePage.php') p = re.compile(r'http://www.standardmedia.co.ke/.*InsidePage.php')
return p.sub('http://www.standardmedia.co.ke/print.php', url) return p.sub('http://www.standardmedia.co.ke/print.php', url)
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -89,7 +89,7 @@ class TheAge(BasicNewsRecipe):
for i in soup.findAll('a'): for i in soup.findAll('a'):
href = i['href'] href = i['href']
if href and re.match('http://www.theage.com.au/frontpage/[0-9]+/[0-9]+/[0-9]+/frontpage.pdf', href): if href and re.match(r'http://www.theage.com.au/frontpage/[0-9]+/[0-9]+/[0-9]+/frontpage.pdf', href):
return href return href
return None return None

View File

@ -92,7 +92,7 @@ class PrivateEyeRecipe(BasicNewsRecipe):
# 1. Title. By author # 1. Title. By author
#.2. Title by author: subtitle #.2. Title by author: subtitle
# 3. Title: author: subtitle # 3. Title: author: subtitle
title_author_re = re.compile('^(.*?)(?:(?: by )|(?:: ))(.*?): (.*?)$') title_author_re = re.compile(r'^(.*?)(?:(?: by )|(?:: ))(.*?): (.*?)$')
# Separate author from title (where it is specified) # Separate author from title (where it is specified)
def title_author(self, head): def title_author(self, head):

View File

@ -38,7 +38,7 @@ class Tweakers(BasicNewsRecipe):
'class': ['sidebar', 'advertorial'] 'class': ['sidebar', 'advertorial']
}, },
{ {
'class': re.compile('nextPrevious') 'class': re.compile(r'nextPrevious')
}, },
] ]
no_stylesheets = True no_stylesheets = True

View File

@ -126,7 +126,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -140,7 +140,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}), name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}), dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}), dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self): def get_cover_url(self):

View File

@ -127,7 +127,7 @@ class CanWestPaper(BasicNewsRecipe):
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id': re.compile('story')})] keep_only_tags = [dict(name='div', attrs={'id': re.compile(r'story')})]
remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'}, remove_tags = [{'class': 'comments'}, {'class': 'comment-intro'}, {'class': 'storytab'},
dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict( dict(name='div', attrs={'class': 'section_title'}), dict(name='div', attrs={'class': 'sharebar'}), dict(
@ -141,7 +141,7 @@ class CanWestPaper(BasicNewsRecipe):
name='div', attrs={'class': 'copyright'}), name='div', attrs={'class': 'copyright'}),
dict(name='div', attrs={'class': 'rule_grey_solid'}), dict(name='div', attrs={'class': 'rule_grey_solid'}),
dict(name='div', attrs={'id': 'soundoff'}), dict(name='div', attrs={'id': 'soundoff'}),
dict(name='div', attrs={'id': re.compile('flyer')}), dict(name='div', attrs={'id': re.compile(r'flyer')}),
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})] dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
def get_cover_url(self): def get_cover_url(self):

View File

@ -82,28 +82,28 @@ class TimesColonist(BasicNewsRecipe):
.caption { font-size: xx-small; font-style: italic; font-weight: normal; } .caption { font-size: xx-small; font-style: italic; font-weight: normal; }
''' '''
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class': re.compile('main.content')})] dict(name='div', attrs={'class': re.compile(r'main.content')})]
def __init__(self, options, log, progress_reporter): def __init__(self, options, log, progress_reporter):
self.remove_tags = [{'class': 'comments'}, self.remove_tags = [{'class': 'comments'},
{'id': 'photocredit'}, {'id': 'photocredit'},
dict(name='div', attrs={ dict(name='div', attrs={
'class': re.compile('top.controls')}), 'class': re.compile(r'top.controls')}),
dict(name='div', attrs={ dict(name='div', attrs={
'class': re.compile('^comments')}), 'class': re.compile(r'^comments')}),
dict(name='div', attrs={ dict(name='div', attrs={
'class': re.compile('social')}), 'class': re.compile(r'social')}),
dict(name='div', attrs={ dict(name='div', attrs={
'class': re.compile('tools')}), 'class': re.compile(r'tools')}),
dict(name='div', attrs={ dict(name='div', attrs={
'class': re.compile('bottom.tools')}), 'class': re.compile(r'bottom.tools')}),
dict(name='div', attrs={ dict(name='div', attrs={
'class': re.compile('window')}), 'class': re.compile(r'window')}),
dict(name='div', attrs={'class': re.compile('related.news.element')})] dict(name='div', attrs={'class': re.compile(r'related.news.element')})]
print('PROFILE NAME = ' + options.output_profile.short_name) print('PROFILE NAME = ' + options.output_profile.short_name)
if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']: if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']:
self.remove_tags.append( self.remove_tags.append(
dict(name='div', attrs={'class': re.compile('image-container')})) dict(name='div', attrs={'class': re.compile(r'image-container')}))
BasicNewsRecipe.__init__(self, options, log, progress_reporter) BasicNewsRecipe.__init__(self, options, log, progress_reporter)
def get_cover_url(self): def get_cover_url(self):
@ -173,19 +173,19 @@ class TimesColonist(BasicNewsRecipe):
return soup return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
byline = soup.find('p', attrs={'class': re.compile('ancillary')}) byline = soup.find('p', attrs={'class': re.compile(r'ancillary')})
if byline is not None: if byline is not None:
authstr = self.tag_to_string(byline, False) authstr = self.tag_to_string(byline, False)
authstr = re.sub('/ *Times Colonist', '/', authstr = re.sub(r'/ *Times Colonist', '/',
authstr, flags=re.IGNORECASE) authstr, flags=re.IGNORECASE)
authstr = re.sub('BY */', '', authstr, flags=re.IGNORECASE) authstr = re.sub(r'BY */', '', authstr, flags=re.IGNORECASE)
newdiv = new_tag(soup, 'div') newdiv = new_tag(soup, 'div')
newdiv.insert(0, authstr) newdiv.insert(0, authstr)
newdiv['class'] = 'byline' newdiv['class'] = 'byline'
byline.replaceWith(newdiv) byline.replaceWith(newdiv)
for caption in soup.findAll('p', attrs={'class': re.compile('caption')}): for caption in soup.findAll('p', attrs={'class': re.compile(r'caption')}):
capstr = self.tag_to_string(caption, False) capstr = self.tag_to_string(caption, False)
capstr = re.sub('Photograph by.*$', '', capstr = re.sub(r'Photograph by.*$', '',
capstr, flags=re.IGNORECASE) capstr, flags=re.IGNORECASE)
newdiv = new_tag(soup, 'div') newdiv = new_tag(soup, 'div')
newdiv.insert(0, capstr) newdiv.insert(0, capstr)
@ -239,13 +239,13 @@ class TimesColonist(BasicNewsRecipe):
except: except:
return ans return ans
mainsoup = soup.find( mainsoup = soup.find(
'div', attrs={'class': re.compile('main.content')}) 'div', attrs={'class': re.compile(r'main.content')})
article_list = [] article_list = []
for wdiv in mainsoup.findAll('div', attrs={'id': re.compile('featured.story')}): for wdiv in mainsoup.findAll('div', attrs={'id': re.compile(r'featured.story')}):
for htag in wdiv.findAll('h3'): for htag in wdiv.findAll('h3'):
self.handle_articles(htag, article_list, sectitle) self.handle_articles(htag, article_list, sectitle)
for ladiv in mainsoup.findAll(attrs={'class': re.compile('leading.articles')}): for ladiv in mainsoup.findAll(attrs={'class': re.compile(r'leading.articles')}):
for wdiv in mainsoup.findAll('div', attrs={'class': re.compile('article.row')}): for wdiv in mainsoup.findAll('div', attrs={'class': re.compile(r'article.row')}):
for htag in wdiv.findAll('h2'): for htag in wdiv.findAll('h2'):
self.handle_articles(htag, article_list, sectitle) self.handle_articles(htag, article_list, sectitle)
ans.append((sectitle, article_list)) ans.append((sectitle, article_list))

View File

@ -139,7 +139,7 @@ class ZeitDe(BasicNewsRecipe):
body.insert(0, header) body.insert(0, header)
# Add real img tags for images # Add real img tags for images
for container in soup.findAll(class_=re.compile('__media-container$')): for container in soup.findAll(class_=re.compile(r'__media-container$')):
img = container.find('noscript') img = container.find('noscript')
if img is not None: if img is not None:
img.name = 'div' img.name = 'div'

View File

@ -200,11 +200,11 @@ class ZeitEPUBAbo(BasicNewsRecipe):
# browser.follow_link(abolink) # browser.follow_link(abolink)
# find page for latest issue # find page for latest issue
latestlink = browser.find_link(text_regex=re.compile( latestlink = browser.find_link(text_regex=re.compile(
'.*ZUR AKTUELLEN AUSGABE.*')) r'.*ZUR AKTUELLEN AUSGABE.*'))
browser.follow_link(latestlink) browser.follow_link(latestlink)
# now find the correct file, we will still use the ePub file # now find the correct file, we will still use the ePub file
epublink = browser.find_link(text_regex=re.compile( epublink = browser.find_link(text_regex=re.compile(
'.*EPUB F.*R E-READER LADEN.*')) # change from '.*EPUB FÜR E-READER LADEN.*' in May 2017 r'.*EPUB F.*R E-READER LADEN.*')) # change from '.*EPUB FÜR E-READER LADEN.*' in May 2017
response = browser.follow_link(epublink) response = browser.follow_link(epublink)
self.report_progress(1, _('next step')) self.report_progress(1, _('next step'))
@ -266,11 +266,11 @@ class ZeitEPUBAbo(BasicNewsRecipe):
# browser.follow_link(abolink) # browser.follow_link(abolink)
# find page for latest issue # find page for latest issue
latestlink = browser.find_link(text_regex=re.compile( latestlink = browser.find_link(text_regex=re.compile(
'.*ZUR AKTUELLEN AUSGABE.*')) r'.*ZUR AKTUELLEN AUSGABE.*'))
browser.follow_link(latestlink) browser.follow_link(latestlink)
# actual cover search # actual cover search
pdflink = browser.find_link(text_regex=re.compile( pdflink = browser.find_link(text_regex=re.compile(
'.*GESAMT-PDF LADEN.*')) r'.*GESAMT-PDF LADEN.*'))
cover_url = urlparse(pdflink.base_url)[0] + '://' + urlparse(pdflink.base_url)[1] + '' + ( cover_url = urlparse(pdflink.base_url)[0] + '://' + urlparse(pdflink.base_url)[1] + '' + (
urlparse(pdflink.url)[2]).replace('ePaper_', '').replace('.pdf', '_001.pdf') urlparse(pdflink.url)[2]).replace('ePaper_', '').replace('.pdf', '_001.pdf')
self.log.warning('PDF link found:') self.log.warning('PDF link found:')

View File

@ -34,6 +34,7 @@ select = [
# preview rules # preview rules
'RUF051', 'RUF056', # useless dict operation 'RUF051', 'RUF056', # useless dict operation
'RUF055', # unnecessary regex 'RUF055', # unnecessary regex
'RUF039', # always use raw-string for regex
] ]
[lint.per-file-ignores] [lint.per-file-ignores]
@ -46,7 +47,7 @@ select = [
"src/calibre/gui2/store/stores/*" = ['UP'] "src/calibre/gui2/store/stores/*" = ['UP']
"src/calibre/gui2/tts/manager.py" = ['UP037'] "src/calibre/gui2/tts/manager.py" = ['UP037']
"src/calibre/utils/copy_files.py" = ['UP037'] "src/calibre/utils/copy_files.py" = ['UP037']
"src/calibre/utils/smartypants.py" = ['RUF055'] "src/calibre/utils/smartypants.py" = ['RUF039', 'RUF055']
"src/qt/*.py" = ['I'] "src/qt/*.py" = ['I']
"src/qt/*.pyi" = ['I'] "src/qt/*.pyi" = ['I']

View File

@ -17,7 +17,7 @@ import time
from contextlib import contextmanager from contextlib import contextmanager
from functools import lru_cache from functools import lru_cache
iswindows = re.search('win(32|64)', sys.platform) iswindows = re.search(r'win(32|64)', sys.platform)
ismacos = 'darwin' in sys.platform ismacos = 'darwin' in sys.platform
isfreebsd = 'freebsd' in sys.platform isfreebsd = 'freebsd' in sys.platform
isnetbsd = 'netbsd' in sys.platform isnetbsd = 'netbsd' in sys.platform

View File

@ -657,7 +657,7 @@ class Parser(SearchQueryParser): # {{{
if location == 'template': if location == 'template':
try: try:
template, sep, query = regex.split('#@#:([tdnb]):', query, flags=regex.IGNORECASE) template, sep, query = regex.split(r'#@#:([tdnb]):', query, flags=regex.IGNORECASE)
if sep: if sep:
sep = sep.lower() sep = sep.lower()
else: else:

View File

@ -34,7 +34,7 @@ class CYBOOK(USBMS):
VENDOR_NAME = 'BOOKEEN' VENDOR_NAME = 'BOOKEEN'
WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_(OPUS|GEN3)__-FD') WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_(OPUS|GEN3)__-FD')
WINDOWS_CARD_A_MEM = re.compile('CYBOOK_(OPUS|GEN3)__-SD') WINDOWS_CARD_A_MEM = re.compile(r'CYBOOK_(OPUS|GEN3)__-SD')
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/Cybook') OSX_MAIN_MEM_VOL_PAT = re.compile(r'/Cybook')
EBOOK_DIR_MAIN = 'eBooks' EBOOK_DIR_MAIN = 'eBooks'
@ -72,7 +72,7 @@ class ORIZON(CYBOOK):
VENDOR_NAME = ['BOOKEEN', 'LINUX'] VENDOR_NAME = ['BOOKEEN', 'LINUX']
WINDOWS_MAIN_MEM = re.compile(r'(CYBOOK_ORIZON__-FD)|(FILE-STOR_GADGET)') WINDOWS_MAIN_MEM = re.compile(r'(CYBOOK_ORIZON__-FD)|(FILE-STOR_GADGET)')
WINDOWS_CARD_A_MEM = re.compile('(CYBOOK_ORIZON__-SD)|(FILE-STOR_GADGET)') WINDOWS_CARD_A_MEM = re.compile(r'(CYBOOK_ORIZON__-SD)|(FILE-STOR_GADGET)')
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions' EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions'

View File

@ -58,11 +58,11 @@ def build_template_regexp(template):
try: try:
template = template.rpartition('/')[2] template = template.rpartition('/')[2]
return re.compile(re.sub('{([^}]*)}', f, template) + r'([_\d]*$)') return re.compile(re.sub(r'{([^}]*)}', f, template) + r'([_\d]*$)')
except: except:
prints('Failed to parse template: %r'%template) prints('Failed to parse template: %r'%template)
template = '{title} - {authors}' template = '{title} - {authors}'
return re.compile(re.sub('{([^}]*)}', f, template) + r'([_\d]*$)') return re.compile(re.sub(r'{([^}]*)}', f, template) + r'([_\d]*$)')
def create_upload_path(mdata, fname, template, sanitize, def create_upload_path(mdata, fname, template, sanitize,

View File

@ -239,7 +239,7 @@ def generate_masthead(title, output_path=None, width=600, height=60):
def escape_xpath_attr(value): def escape_xpath_attr(value):
if '"' in value: if '"' in value:
if "'" in value: if "'" in value:
parts = re.split('("+)', value) parts = re.split(r'("+)', value)
ans = [] ans = []
for x in parts: for x in parts:
if x: if x:

View File

@ -42,7 +42,7 @@ def _metadata_from_table(soup, searchfor):
# on the home page. cue some nasty special-case hacks... # on the home page. cue some nasty special-case hacks...
if re.match(r'^\s*'+searchfor+r'\s*$', td.decode_contents(), flags=re.I): if re.match(r'^\s*'+searchfor+r'\s*$', td.decode_contents(), flags=re.I):
meta = _detag(td.findNextSibling('td')) meta = _detag(td.findNextSibling('td'))
return re.sub('^:', '', meta).strip() return re.sub(r'^:', '', meta).strip()
else: else:
meta = _detag(td) meta = _detag(td)
return re.sub(r'^[^:]+:', '', meta).strip() return re.sub(r'^[^:]+:', '', meta).strip()
@ -89,7 +89,7 @@ def _get_comments(soup):
def _get_cover(soup, rdr): def _get_cover(soup, rdr):
ans = None ans = None
try: try:
ans = soup.find('img', alt=re.compile('cover', flags=re.I))['src'] ans = soup.find('img', alt=re.compile(r'cover', flags=re.I))['src']
except TypeError: except TypeError:
# meeehh, no handy alt-tag goodness, try some hackery # meeehh, no handy alt-tag goodness, try some hackery
# the basic idea behind this is that in general, the cover image # the basic idea behind this is that in general, the cover image

View File

@ -16,7 +16,7 @@ XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
SVG_NS = 'http://www.w3.org/2000/svg' SVG_NS = 'http://www.w3.org/2000/svg'
XLINK_NS = 'http://www.w3.org/1999/xlink' XLINK_NS = 'http://www.w3.org/1999/xlink'
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE) _span_pat = re.compile(r'<span.*?</span>', re.DOTALL|re.IGNORECASE)
LIGATURES = { LIGATURES = {
# 'Æ': 'AE', # 'Æ': 'AE',
@ -92,7 +92,7 @@ class DocAnalysis:
elif format == 'pdf': elif format == 'pdf':
linere = re.compile(r'(?<=<br>)(?!\s*<br>).*?(?=<br>)', re.DOTALL) linere = re.compile(r'(?<=<br>)(?!\s*<br>).*?(?=<br>)', re.DOTALL)
elif format == 'spanned_html': elif format == 'spanned_html':
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL) linere = re.compile(r'(?<=<span).*?(?=</span>)', re.DOTALL)
elif format == 'txt': elif format == 'txt':
linere = re.compile('.*?\n') linere = re.compile('.*?\n')
self.lines = linere.findall(raw) self.lines = linere.findall(raw)
@ -430,16 +430,16 @@ def book_designer_rules():
if ans is None: if ans is None:
ans = book_designer_rules.ans = [ ans = book_designer_rules.ans = [
# HR # HR
(re.compile('<hr>', re.IGNORECASE), (re.compile(r'<hr>', re.IGNORECASE),
lambda match : '<span style="page-break-after:always"> </span>'), lambda match : '<span style="page-break-after:always"> </span>'),
# Create header tags # Create header tags
(re.compile(r'<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE), (re.compile(r'<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))), lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile(r'<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE), (re.compile(r'<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))), lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), (re.compile(r'<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)), lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), (re.compile(r'<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)), lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
] ]
return ans return ans
@ -458,7 +458,7 @@ class HTMLPreProcessor:
re.IGNORECASE).search(src) is not None re.IGNORECASE).search(src) is not None
def is_book_designer(self, raw): def is_book_designer(self, raw):
return re.search('<H2[^><]*id=BookTitle', raw) is not None return re.search(r'<H2[^><]*id=BookTitle', raw) is not None
def is_pdftohtml(self, src): def is_pdftohtml(self, src):
return "<!-- created by calibre's pdftohtml -->" in src[:1000] return "<!-- created by calibre's pdftohtml -->" in src[:1000]

View File

@ -27,7 +27,7 @@ class HeuristicProcessor:
self.chapters_with_title = 0 self.chapters_with_title = 0
self.blanks_deleted = False self.blanks_deleted = False
self.blanks_between_paragraphs = False self.blanks_between_paragraphs = False
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL) self.linereg = re.compile(r'(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|whitespace)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE) self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|whitespace)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE) self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}(?!\s*<h\d)', re.IGNORECASE) self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}(?!\s*<h\d)', re.IGNORECASE)
@ -108,7 +108,7 @@ class HeuristicProcessor:
inspect. Percent is the minimum percent of line endings which should inspect. Percent is the minimum percent of line endings which should
be marked up to return true. be marked up to return true.
''' '''
htm_end_ere = re.compile('</(p|div)>', re.DOTALL) htm_end_ere = re.compile(r'</(p|div)>', re.DOTALL)
line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL) line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL)
htm_end = htm_end_ere.findall(raw) htm_end = htm_end_ere.findall(raw)
line_end = line_end_ere.findall(raw) line_end = line_end_ere.findall(raw)
@ -209,7 +209,7 @@ class HeuristicProcessor:
typical_chapters = 15000. typical_chapters = 15000.
self.min_chapters = int(ceil(wordcount / typical_chapters)) self.min_chapters = int(ceil(wordcount / typical_chapters))
self.log.debug('minimum chapters required are: '+str(self.min_chapters)) self.log.debug('minimum chapters required are: '+str(self.min_chapters))
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE) heading = re.compile(r'<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html)) self.html_preprocess_sections = len(heading.findall(html))
self.log.debug('found ' + str(self.html_preprocess_sections) + ' pre-existing headings') self.log.debug('found ' + str(self.html_preprocess_sections) + ' pre-existing headings')
@ -299,7 +299,7 @@ class HeuristicProcessor:
break break
full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close
if n_lookahead_req: if n_lookahead_req:
n_lookahead = re.sub('(ou|in|cha)', 'lookahead_', full_chapter_line) n_lookahead = re.sub(r'(ou|in|cha)', 'lookahead_', full_chapter_line)
if not analyze: if not analyze:
self.log.debug('Marked ' + str(self.html_preprocess_sections) + ' headings, ' + log_message) self.log.debug('Marked ' + str(self.html_preprocess_sections) + ' headings, ' + log_message)
@ -442,7 +442,7 @@ class HeuristicProcessor:
# Delete microsoft 'smart' tags # Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\\w+>', '', html) html = re.sub('(?i)</?st1:\\w+>', '', html)
# Re-open self closing paragraph tags # Re-open self closing paragraph tags
html = re.sub('<p[^>/]*/>', '<p> </p>', html) html = re.sub(r'<p[^>/]*/>', '<p> </p>', html)
# Get rid of empty span, bold, font, em, & italics tags # Get rid of empty span, bold, font, em, & italics tags
fmt_tags = 'font|[ibu]|em|strong' fmt_tags = 'font|[ibu]|em|strong'
open_fmt_pat, close_fmt_pat = fr'<(?:{fmt_tags})(?:\s[^>]*)?>', f'</(?:{fmt_tags})>' open_fmt_pat, close_fmt_pat = fr'<(?:{fmt_tags})(?:\s[^>]*)?>', f'</(?:{fmt_tags})>'
@ -462,8 +462,8 @@ class HeuristicProcessor:
determines the type of html line ending used most commonly in a document determines the type of html line ending used most commonly in a document
use before calling docanalysis functions use before calling docanalysis functions
''' '''
paras_reg = re.compile('<p[^>]*>', re.IGNORECASE) paras_reg = re.compile(r'<p[^>]*>', re.IGNORECASE)
spans_reg = re.compile('<span[^>]*>', re.IGNORECASE) spans_reg = re.compile(r'<span[^>]*>', re.IGNORECASE)
paras = len(paras_reg.findall(html)) paras = len(paras_reg.findall(html))
spans = len(spans_reg.findall(html)) spans = len(spans_reg.findall(html))
if spans > 1: if spans > 1:
@ -557,8 +557,8 @@ class HeuristicProcessor:
def detect_soft_breaks(self, html): def detect_soft_breaks(self, html):
line = '(?P<initline>'+self.line_open+'\\s*(?P<init_content>.*?)'+self.line_close+')' line = '(?P<initline>'+self.line_open+'\\s*(?P<init_content>.*?)'+self.line_close+')'
line_two = '(?P<line_two>'+re.sub('(ou|in|cha)', 'linetwo_', self.line_open)+ \ line_two = '(?P<line_two>'+re.sub(r'(ou|in|cha)', 'linetwo_', self.line_open)+ \
'\\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')' '\\s*(?P<line_two_content>.*?)'+re.sub(r'(ou|in|cha)', 'linetwo_', self.line_close)+')'
div_break_candidate_pattern = line+'\\s*<div[^>]*>\\s*</div>\\s*'+line_two div_break_candidate_pattern = line+'\\s*<div[^>]*>\\s*</div>\\s*'+line_two
div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE) div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE)
@ -596,8 +596,8 @@ class HeuristicProcessor:
All other html is converted to text. All other html is converted to text.
''' '''
hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em; page-break-before:avoid">' hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em; page-break-before:avoid">'
if re.findall('(<|>)', replacement_break): if re.findall(r'(<|>)', replacement_break):
if re.match('^<hr', replacement_break): if re.match(r'^<hr', replacement_break):
if replacement_break.find('width') != -1: if replacement_break.find('width') != -1:
try: try:
width = int(re.sub('.*?width(:|=)(?P<wnum>\\d+).*', '\\g<wnum>', replacement_break)) width = int(re.sub('.*?width(:|=)(?P<wnum>\\d+).*', '\\g<wnum>', replacement_break))
@ -608,11 +608,11 @@ class HeuristicProcessor:
else: else:
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break) replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
divpercent = (100 - width) // 2 divpercent = (100 - width) // 2
hr_open = re.sub('45', str(divpercent), hr_open) hr_open = re.sub(r'45', str(divpercent), hr_open)
scene_break = hr_open+replacement_break+'</div>' scene_break = hr_open+replacement_break+'</div>'
else: else:
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>' scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
elif re.match('^<img', replacement_break): elif re.match(r'^<img', replacement_break):
scene_break = self.scene_break_open+replacement_break+'</p>' scene_break = self.scene_break_open+replacement_break+'</p>'
else: else:
from calibre.utils.html2text import html2text from calibre.utils.html2text import html2text
@ -638,7 +638,7 @@ class HeuristicProcessor:
empty_paragraph = '\n<p> </p>\n' empty_paragraph = '\n<p> </p>\n'
self.in_blockquote = False self.in_blockquote = False
self.previous_was_paragraph = False self.previous_was_paragraph = False
html = re.sub('</?a[^>]*>', '', html) html = re.sub(r'</?a[^>]*>', '', html)
def convert_styles(match): def convert_styles(match):
# print('raw styles are: '+match.group('styles')) # print('raw styles are: '+match.group('styles'))

View File

@ -91,7 +91,7 @@ class HTMLFile:
HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE) HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
HTML_PAT_BIN = re.compile(br'<\s*html', re.IGNORECASE) HTML_PAT_BIN = re.compile(br'<\s*html', re.IGNORECASE)
TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE) TITLE_PAT = re.compile(r'<title>([^<>]+)</title>', re.IGNORECASE)
LINK_PAT = re.compile( LINK_PAT = re.compile(
r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))', r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))',
re.DOTALL|re.IGNORECASE) re.DOTALL|re.IGNORECASE)

View File

@ -269,7 +269,7 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
tag = 'div' tag = 'div'
# Add page-break-brefore: always because renders typically treat a new file (we're merging files) # Add page-break-brefore: always because renders typically treat a new file (we're merging files)
# as a page break and remove all other page break types that might be set. # as a page break and remove all other page break types that might be set.
style_a = 'page-break-before: always; %s' % re.sub('page-break-[^:]+:[^;]+;?', '', style_a) style_a = 'page-break-before: always; %s' % re.sub(r'page-break-[^:]+:[^;]+;?', '', style_a)
# Remove unnecessary spaces. # Remove unnecessary spaces.
style_a = re.sub(r'\s{2,}', ' ', style_a).strip() style_a = re.sub(r'\s{2,}', ' ', style_a).strip()
tags.append(tag) tags.append(tag)

View File

@ -34,8 +34,8 @@ class Hyphenator:
def _insert_pattern(self, pattern): def _insert_pattern(self, pattern):
# Convert a pattern like 'a1bc3d4' into a string of chars 'abcd' # Convert a pattern like 'a1bc3d4' into a string of chars 'abcd'
# and a list of points [ 1, 0, 3, 4 ]. # and a list of points [ 1, 0, 3, 4 ].
chars = re.sub('[0-9]', '', pattern) chars = re.sub(r'[0-9]', '', pattern)
points = [int(d or 0) for d in re.split('[.a-z]', pattern)] points = [int(d or 0) for d in re.split(r'[.a-z]', pattern)]
# Insert the pattern into the tree. Each character finds a dict # Insert the pattern into the tree. Each character finds a dict
# another level down in the tree, and leaf nodes have the list of # another level down in the tree, and leaf nodes have the list of

View File

@ -163,7 +163,7 @@ class HTMLConverter:
# Fix Book Designer markup # Fix Book Designer markup
BOOK_DESIGNER = [ BOOK_DESIGNER = [
# HR # HR
(re.compile('<hr>', re.IGNORECASE), (re.compile(r'<hr>', re.IGNORECASE),
lambda match : '<span style="page-break-after:always"> </span>'), lambda match : '<span style="page-break-after:always"> </span>'),
# Create header tags # Create header tags
(re.compile(r'<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE), (re.compile(r'<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
@ -279,7 +279,7 @@ class HTMLConverter:
if isinstance(src, bytes): if isinstance(src, bytes):
src = src.decode('utf-8', 'replace') src = src.decode('utf-8', 'replace')
match = self.PAGE_BREAK_PAT.search(src) match = self.PAGE_BREAK_PAT.search(src)
if match and not re.match('avoid', match.group(1), re.IGNORECASE): if match and not re.match(r'avoid', match.group(1), re.IGNORECASE):
self.page_break_found = True self.page_break_found = True
ncss, npcss = self.parse_css(src) ncss, npcss = self.parse_css(src)
if ncss: if ncss:
@ -324,10 +324,10 @@ class HTMLConverter:
def is_baen(self, soup): def is_baen(self, soup):
return bool(soup.find('meta', attrs={'name':'Publisher', return bool(soup.find('meta', attrs={'name':'Publisher',
'content':re.compile('Baen', re.IGNORECASE)})) 'content':re.compile(r'Baen', re.IGNORECASE)}))
def is_book_designer(self, raw): def is_book_designer(self, raw):
return bool(re.search('<H2[^><]*id=BookTitle', raw)) return bool(re.search(r'<H2[^><]*id=BookTitle', raw))
def preprocess(self, raw): def preprocess(self, raw):
nmassage = [] nmassage = []
@ -1152,7 +1152,7 @@ class HTMLConverter:
def font_weight(val): def font_weight(val):
ans = 0 ans = 0
m = re.search('([0-9]+)', val) m = re.search(r'([0-9]+)', val)
if m: if m:
ans = int(m.group(1)) ans = int(m.group(1))
elif val.find('bold') >= 0 or val.find('strong') >= 0: elif val.find('bold') >= 0 or val.find('strong') >= 0:
@ -1544,7 +1544,7 @@ class HTMLConverter:
with open(path, 'rb') as f: with open(path, 'rb') as f:
src = f.read().decode('utf-8', 'replace') src = f.read().decode('utf-8', 'replace')
match = self.PAGE_BREAK_PAT.search(src) match = self.PAGE_BREAK_PAT.search(src)
if match and not re.match('avoid', match.group(1), re.IGNORECASE): if match and not re.match(r'avoid', match.group(1), re.IGNORECASE):
self.page_break_found = True self.page_break_found = True
ncss, npcss = self.parse_css(src) ncss, npcss = self.parse_css(src)
except OSError: except OSError:
@ -1869,11 +1869,11 @@ def process_file(path, options, logger):
header.append(fheader + ' ') header.append(fheader + ' ')
book, fonts = Book(options, logger, header=header, **args) book, fonts = Book(options, logger, header=header, **args)
le = re.compile(options.link_exclude) if options.link_exclude else \ le = re.compile(options.link_exclude) if options.link_exclude else \
re.compile('$') re.compile(r'$')
pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \ pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \
re.compile('$') re.compile(r'$')
fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \ fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
re.compile('$') re.compile(r'$')
cq = options.chapter_attr.split(',') cq = options.chapter_attr.split(',')
if len(cq) < 3: if len(cq) < 3:
raise ValueError('The --chapter-attr setting must have 2 commas.') raise ValueError('The --chapter-attr setting must have 2 commas.')

View File

@ -213,7 +213,7 @@ class Row:
def __init__(self, conv, row, css, colpad): def __init__(self, conv, row, css, colpad):
self.cells = [] self.cells = []
self.colpad = colpad self.colpad = colpad
cells = row.findAll(re.compile('td|th', re.IGNORECASE)) cells = row.findAll(re.compile(r'td|th', re.IGNORECASE))
self.targets = [] self.targets = []
for cell in cells: for cell in cells:
ccss = conv.tag_css(cell, css)[0] ccss = conv.tag_css(cell, css)[0]

View File

@ -172,7 +172,7 @@ def get_title_sort_pat(lang=None):
except: except:
ans = re.compile(r'^(A|The|An)\s+', re.IGNORECASE) ans = re.compile(r'^(A|The|An)\s+', re.IGNORECASE)
else: else:
ans = re.compile('^$') # matches only the empty string ans = re.compile(r'^$') # matches only the empty string
_title_pats[lang] = ans _title_pats[lang] = ans
return ans return ans

View File

@ -139,7 +139,7 @@ def metadata_from_filename(name, pat=None, fallback_pat=None):
try: try:
pat = regex.compile(prefs.get('filename_pattern'), flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE) pat = regex.compile(prefs.get('filename_pattern'), flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE)
except Exception: except Exception:
pat = regex.compile('(?P<title>.+) - (?P<author>[^_]+)', flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE) pat = regex.compile(r'(?P<title>.+) - (?P<author>[^_]+)', flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE)
name = name.replace('_', ' ') name = name.replace('_', ' ')
match = pat.search(name) match = pat.search(name)

View File

@ -59,4 +59,4 @@ def set_metadata(stream, mi):
MetadataWriter(stream, mi) MetadataWriter(stream, mi)
stream.seek(0) stream.seek(0)
stream.write(re.sub('[^-A-Za-z0-9 ]+', '_', mi.title).ljust(31, '\x00')[:31].encode('ascii', 'replace') + b'\x00') stream.write(re.sub(r'[^-A-Za-z0-9 ]+', '_', mi.title).ljust(31, '\x00')[:31].encode('ascii', 'replace') + b'\x00')

View File

@ -365,7 +365,7 @@ class Worker(Thread): # Get details {{{
r'([0-9.,]+) ?(out of|von|van|su|étoiles sur|つ星のうち|de un máximo de|de|av) ' r'([0-9.,]+) ?(out of|von|van|su|étoiles sur|つ星のうち|de un máximo de|de|av) '
r'([\d\.]+)( (stars|Sternen|stelle|estrellas|estrelas|sterren|stjärnor)){0,1}' r'([\d\.]+)( (stars|Sternen|stelle|estrellas|estrelas|sterren|stjärnor)){0,1}'
) )
self.ratings_pat_cn = re.compile('([0-9.]+) 颗星,最多 5 颗星') self.ratings_pat_cn = re.compile(r'([0-9.]+) 颗星,最多 5 颗星')
self.ratings_pat_jp = re.compile(r'\d+つ星のうち([\d\.]+)') self.ratings_pat_jp = re.compile(r'\d+つ星のうち([\d\.]+)')
lm = { lm = {

View File

@ -165,7 +165,7 @@ def wayback_url_processor(url):
if url.startswith('/'): if url.startswith('/'):
# Use original URL instead of absolutizing to wayback URL as wayback is # Use original URL instead of absolutizing to wayback URL as wayback is
# slow # slow
m = re.search('https?:', url) m = re.search(r'https?:', url)
if m is None: if m is None:
url = 'https://web.archive.org' + url url = 'https://web.archive.org' + url
else: else:

View File

@ -380,7 +380,7 @@ class MobiReader:
self.processed_html = re.sub( self.processed_html = re.sub(
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', r'\g<blockquote>'+r'\g<para>', self.processed_html) r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', r'\g<blockquote>'+r'\g<para>', self.processed_html)
bods = htmls = 0 bods = htmls = 0
for x in re.finditer('</body>|</html>', self.processed_html): for x in re.finditer(r'</body>|</html>', self.processed_html):
if x == '</body>': if x == '</body>':
bods +=1 bods +=1
else: else:

View File

@ -155,7 +155,7 @@ def hfix(name, raw):
return raw return raw
CLI_HELP = {x:hfix(x, re.sub('<.*?>', '', y)) for x, y in iteritems(HELP)} CLI_HELP = {x:hfix(x, re.sub(r'<.*?>', '', y)) for x, y in iteritems(HELP)}
# }}} # }}}

View File

@ -36,7 +36,7 @@ class Patterns:
# French words with prefixes are reduced to the stem word, so that the # French words with prefixes are reduced to the stem word, so that the
# words appear only once in the word list # words appear only once in the word list
self.fr_elision_pat = regex.compile( self.fr_elision_pat = regex.compile(
"^(?:l|d|m|t|s|j|c|ç|lorsqu|puisqu|quoiqu|qu)[']", flags=regex.UNICODE | regex.VERSION1 | regex.IGNORECASE) r"^(?:l|d|m|t|s|j|c|ç|lorsqu|puisqu|quoiqu|qu)[']", flags=regex.UNICODE | regex.VERSION1 | regex.IGNORECASE)
def patterns(): def patterns():

View File

@ -102,7 +102,7 @@ class SVGRasterizer:
if view_box is not None: if view_box is not None:
try: try:
box = [float(x) for x in filter(None, re.split('[, ]', view_box))] box = [float(x) for x in filter(None, re.split(r'[, ]', view_box))]
sizes = [box[2]-box[0], box[3] - box[1]] sizes = [box[2]-box[0], box[3] - box[1]]
except (TypeError, ValueError, IndexError): except (TypeError, ValueError, IndexError):
logger.warn('SVG image has invalid viewBox="%s", ignoring the viewBox' % view_box) logger.warn('SVG image has invalid viewBox="%s", ignoring the viewBox' % view_box)

View File

@ -152,7 +152,7 @@ def flip_image(img, flip):
def flip_images(raw): def flip_images(raw):
for match in re.finditer('<IMG[^>]+/?>', raw, flags=re.I): for match in re.finditer(r'<IMG[^>]+/?>', raw, flags=re.I):
img = match.group() img = match.group()
m = re.search(r'class="(x|y|xy)flip"', img) m = re.search(r'class="(x|y|xy)flip"', img)
if m is None: if m is None:
@ -174,5 +174,5 @@ def flip_images(raw):
counter += 1 counter += 1
return m.group(1).rstrip('/') + f' alt="Image {counter}"/>' return m.group(1).rstrip('/') + f' alt="Image {counter}"/>'
raw = re.sub('(<IMG[^>]+)/?>', add_alt, raw, flags=re.I) raw = re.sub(r'(<IMG[^>]+)/?>', add_alt, raw, flags=re.I)
return raw return raw

View File

@ -121,7 +121,7 @@ class Font:
self.metrics, self.compress = metrics, compress self.metrics, self.compress = metrics, compress
self.is_otf = self.metrics.is_otf self.is_otf = self.metrics.is_otf
self.subset_tag = str( self.subset_tag = str(
re.sub('.', lambda m: codepoint_to_chr(int(m.group())+ord('A')), oct(num).replace('o', '') re.sub(r'.', lambda m: codepoint_to_chr(int(m.group())+ord('A')), oct(num).replace('o', '')
)).rjust(6, 'A') )).rjust(6, 'A')
self.font_stream = FontStream(metrics.is_otf, compress=compress) self.font_stream = FontStream(metrics.is_otf, compress=compress)
try: try:

View File

@ -199,11 +199,11 @@ class PMLMLizer:
text = re.sub('[^\x00-\x7f]', lambda x: unipmlcode(x.group()), text) text = re.sub('[^\x00-\x7f]', lambda x: unipmlcode(x.group()), text)
# Remove excess spaces at beginning and end of lines # Remove excess spaces at beginning and end of lines
text = re.sub('(?m)^[ ]+', '', text) text = re.sub(r'(?m)^[ ]+', '', text)
text = re.sub('(?m)[ ]+$', '', text) text = re.sub(r'(?m)[ ]+$', '', text)
# Remove excessive spaces # Remove excessive spaces
text = re.sub('[ ]{2,}', ' ', text) text = re.sub(r'[ ]{2,}', ' ', text)
# Condense excessive \c empty line sequences. # Condense excessive \c empty line sequences.
text = re.sub(r'(\\c\s*\\c\s*){2,}', r'\\c \n\\c\n', text) text = re.sub(r'(\\c\s*\\c\s*){2,}', r'\\c \n\\c\n', text)
@ -213,7 +213,7 @@ class PMLMLizer:
if self.opts.remove_paragraph_spacing: if self.opts.remove_paragraph_spacing:
text = re.sub('\n{2,}', '\n', text) text = re.sub('\n{2,}', '\n', text)
# Only indent lines that don't have special formatting # Only indent lines that don't have special formatting
text = re.sub('(?imu)^(?P<text>.+)$', lambda mo: mo.group('text') text = re.sub(r'(?imu)^(?P<text>.+)$', lambda mo: mo.group('text')
if re.search(r'\\[XxCmrctTp]', mo.group('text')) else ' %s' % mo.group('text'), text) if re.search(r'\\[XxCmrctTp]', mo.group('text')) else ' %s' % mo.group('text'), text)
else: else:
text = re.sub('\n{3,}', '\n\n', text) text = re.sub('\n{3,}', '\n\n', text)

View File

@ -19,11 +19,11 @@ def tounicode(tree_or_node, **kwargs):
REGEXES = { REGEXES = {
'unlikelyCandidatesRe': re.compile('combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter',re.I), # noqa: E501 'unlikelyCandidatesRe': re.compile(r'combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter',re.I), # noqa: E501
'okMaybeItsACandidateRe': re.compile('and|article|body|column|main|shadow',re.I), 'okMaybeItsACandidateRe': re.compile(r'and|article|body|column|main|shadow',re.I),
'positiveRe': re.compile('article|body|content|entry|hentry|main|page|pagination|post|text|blog|story',re.I), 'positiveRe': re.compile(r'article|body|content|entry|hentry|main|page|pagination|post|text|blog|story',re.I),
'negativeRe': re.compile('combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget',re.I), # noqa: E501 'negativeRe': re.compile(r'combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget',re.I), # noqa: E501
'divToPElementsRe': re.compile('<(a|blockquote|dl|div|img|ol|p|pre|table|ul)',re.I), 'divToPElementsRe': re.compile(r'<(a|blockquote|dl|div|img|ol|p|pre|table|ul)',re.I),
# 'replaceBrsRe': re.compile('(<br[^>]*>[ \n\r\t]*){2,}',re.I), # 'replaceBrsRe': re.compile('(<br[^>]*>[ \n\r\t]*){2,}',re.I),
# 'replaceFontsRe': re.compile('<(\/?)font[^>]*>',re.I), # 'replaceFontsRe': re.compile('<(\/?)font[^>]*>',re.I),
# 'trimRe': re.compile('^\s+|\s+$/'), # 'trimRe': re.compile('^\s+|\s+$/'),

View File

@ -121,7 +121,7 @@ class RTFMLizer:
self.log.debug('Converting %s to RTF markup...' % item.href) self.log.debug('Converting %s to RTF markup...' % item.href)
# Removing comments is needed as comments with -- inside them can # Removing comments is needed as comments with -- inside them can
# cause fromstring() to fail # cause fromstring() to fail
content = re.sub('<!--.*?-->', '', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL) content = re.sub(r'<!--.*?-->', '', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL)
content = self.remove_newlines(content) content = self.remove_newlines(content)
content = self.remove_tabs(content) content = self.remove_tabs(content)
content = safe_xml_fromstring(content) content = safe_xml_fromstring(content)
@ -198,7 +198,7 @@ class RTFMLizer:
text = re.sub('%s{3,}' % os.linesep, f'{os.linesep}{os.linesep}', text) text = re.sub('%s{3,}' % os.linesep, f'{os.linesep}{os.linesep}', text)
# Remove excessive spaces # Remove excessive spaces
text = re.sub('[ ]{2,}', ' ', text) text = re.sub(r'[ ]{2,}', ' ', text)
text = re.sub('\t{2,}', '\t', text) text = re.sub('\t{2,}', '\t', text)
text = text.replace('\t ', '\t') text = text.replace('\t ', '\t')

View File

@ -652,7 +652,7 @@ class ProcessTokens:
return f'cw<{pre}<{token}<nu<{type}\n' return f'cw<{pre}<{token}<nu<{type}\n'
def __language_func(self, pre, token, num): def __language_func(self, pre, token, num):
lang_name = self.__language_dict.get(int(re.search('[0-9]+', num).group())) lang_name = self.__language_dict.get(int(re.search(r'[0-9]+', num).group()))
if not lang_name: if not lang_name:
lang_name = 'not defined' lang_name = 'not defined'
if self.__run_level > 3: if self.__run_level > 3:

View File

@ -165,13 +165,13 @@ class SNBMLizer:
text = re.sub('\n[ ]+\n', '\n\n', text) text = re.sub('\n[ ]+\n', '\n\n', text)
if self.opts.remove_paragraph_spacing: if self.opts.remove_paragraph_spacing:
text = re.sub('\n{2,}', '\n', text) text = re.sub('\n{2,}', '\n', text)
text = re.sub('(?imu)^(?=.)', '\t', text) text = re.sub(r'(?imu)^(?=.)', '\t', text)
else: else:
text = re.sub('\n{3,}', '\n\n', text) text = re.sub('\n{3,}', '\n\n', text)
# Replace spaces at the beginning and end of lines # Replace spaces at the beginning and end of lines
text = re.sub('(?imu)^[ ]+', '', text) text = re.sub(r'(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text) text = re.sub(r'(?imu)[ ]+$', '', text)
if self.opts.snb_max_line_length: if self.opts.snb_max_line_length:
max_length = self.opts.snb_max_line_length max_length = self.opts.snb_max_line_length

View File

@ -6,117 +6,117 @@ import re
def unsmarten(txt): def unsmarten(txt):
txt = re.sub('&#162;|&cent;|¢', r'{c\}', txt) # cent txt = re.sub(r'&#162;|&cent;|¢', r'{c\}', txt) # cent
txt = re.sub('&#163;|&pound;|£', r'{L-}', txt) # pound txt = re.sub(r'&#163;|&pound;|£', r'{L-}', txt) # pound
txt = re.sub('&#165;|&yen;|¥', r'{Y=}', txt) # yen txt = re.sub(r'&#165;|&yen;|¥', r'{Y=}', txt) # yen
txt = re.sub('&#169;|&copy;|©', r'{(c)}', txt) # copyright txt = re.sub(r'&#169;|&copy;|©', r'{(c)}', txt) # copyright
txt = re.sub('&#174;|&reg;|®', r'{(r)}', txt) # registered txt = re.sub(r'&#174;|&reg;|®', r'{(r)}', txt) # registered
txt = re.sub('&#188;|&frac14;|¼', r'{1/4}', txt) # quarter txt = re.sub(r'&#188;|&frac14;|¼', r'{1/4}', txt) # quarter
txt = re.sub('&#189;|&frac12;|½', r'{1/2}', txt) # half txt = re.sub(r'&#189;|&frac12;|½', r'{1/2}', txt) # half
txt = re.sub('&#190;|&frac34;|¾', r'{3/4}', txt) # three-quarter txt = re.sub(r'&#190;|&frac34;|¾', r'{3/4}', txt) # three-quarter
txt = re.sub('&#192;|&Agrave;|À', r'{A`)}', txt) # A-grave txt = re.sub(r'&#192;|&Agrave;|À', r'{A`)}', txt) # A-grave
txt = re.sub('&#193;|&Aacute;|Á', r"{A'}", txt) # A-acute txt = re.sub(r'&#193;|&Aacute;|Á', r"{A'}", txt) # A-acute
txt = re.sub('&#194;|&Acirc;|Â', r'{A^}', txt) # A-circumflex txt = re.sub(r'&#194;|&Acirc;|Â', r'{A^}', txt) # A-circumflex
txt = re.sub('&#195;|&Atilde;|Ã', r'{A~}', txt) # A-tilde txt = re.sub(r'&#195;|&Atilde;|Ã', r'{A~}', txt) # A-tilde
txt = re.sub('&#196;|&Auml;|Ä', r'{A"}', txt) # A-umlaut txt = re.sub(r'&#196;|&Auml;|Ä', r'{A"}', txt) # A-umlaut
txt = re.sub('&#197;|&Aring;|Å', r'{Ao}', txt) # A-ring txt = re.sub(r'&#197;|&Aring;|Å', r'{Ao}', txt) # A-ring
txt = re.sub('&#198;|&AElig;|Æ', r'{AE}', txt) # AE txt = re.sub(r'&#198;|&AElig;|Æ', r'{AE}', txt) # AE
txt = re.sub('&#199;|&Ccedil;|Ç', r'{C,}', txt) # C-cedilla txt = re.sub(r'&#199;|&Ccedil;|Ç', r'{C,}', txt) # C-cedilla
txt = re.sub('&#200;|&Egrave;|È', r'{E`}', txt) # E-grave txt = re.sub(r'&#200;|&Egrave;|È', r'{E`}', txt) # E-grave
txt = re.sub('&#201;|&Eacute;|É', r"{E'}", txt) # E-acute txt = re.sub(r'&#201;|&Eacute;|É', r"{E'}", txt) # E-acute
txt = re.sub('&#202;|&Ecirc;|Ê', r'{E^}', txt) # E-circumflex txt = re.sub(r'&#202;|&Ecirc;|Ê', r'{E^}', txt) # E-circumflex
txt = re.sub('&#203;|&Euml;|Ë', r'{E"}', txt) # E-umlaut txt = re.sub(r'&#203;|&Euml;|Ë', r'{E"}', txt) # E-umlaut
txt = re.sub('&#204;|&Igrave;|Ì', r'{I`}', txt) # I-grave txt = re.sub(r'&#204;|&Igrave;|Ì', r'{I`}', txt) # I-grave
txt = re.sub('&#205;|&Iacute;|Í', r"{I'}", txt) # I-acute txt = re.sub(r'&#205;|&Iacute;|Í', r"{I'}", txt) # I-acute
txt = re.sub('&#206;|&Icirc;|Î', r'{I^}', txt) # I-circumflex txt = re.sub(r'&#206;|&Icirc;|Î', r'{I^}', txt) # I-circumflex
txt = re.sub('&#207;|&Iuml;|Ï', r'{I"}', txt) # I-umlaut txt = re.sub(r'&#207;|&Iuml;|Ï', r'{I"}', txt) # I-umlaut
txt = re.sub('&#208;|&ETH;|Ð', r'{D-}', txt) # ETH txt = re.sub(r'&#208;|&ETH;|Ð', r'{D-}', txt) # ETH
txt = re.sub('&#209;|&Ntilde;|Ñ', r'{N~}', txt) # N-tilde txt = re.sub(r'&#209;|&Ntilde;|Ñ', r'{N~}', txt) # N-tilde
txt = re.sub('&#210;|&Ograve;|Ò', r'{O`}', txt) # O-grave txt = re.sub(r'&#210;|&Ograve;|Ò', r'{O`}', txt) # O-grave
txt = re.sub('&#211;|&Oacute;|Ó', r"{O'}", txt) # O-acute txt = re.sub(r'&#211;|&Oacute;|Ó', r"{O'}", txt) # O-acute
txt = re.sub('&#212;|&Ocirc;|Ô', r'{O^}', txt) # O-circumflex txt = re.sub(r'&#212;|&Ocirc;|Ô', r'{O^}', txt) # O-circumflex
txt = re.sub('&#213;|&Otilde;|Õ', r'{O~}', txt) # O-tilde txt = re.sub(r'&#213;|&Otilde;|Õ', r'{O~}', txt) # O-tilde
txt = re.sub('&#214;|&Ouml;|Ö', r'{O"}', txt) # O-umlaut txt = re.sub(r'&#214;|&Ouml;|Ö', r'{O"}', txt) # O-umlaut
txt = re.sub('&#215;|&times;|×', r'{x}', txt) # dimension txt = re.sub(r'&#215;|&times;|×', r'{x}', txt) # dimension
txt = re.sub('&#216;|&Oslash;|Ø', r'{O/}', txt) # O-slash txt = re.sub(r'&#216;|&Oslash;|Ø', r'{O/}', txt) # O-slash
txt = re.sub('&#217;|&Ugrave;|Ù', r'{U`}', txt) # U-grave txt = re.sub(r'&#217;|&Ugrave;|Ù', r'{U`}', txt) # U-grave
txt = re.sub('&#218;|&Uacute;|Ú', r"{U'}", txt) # U-acute txt = re.sub(r'&#218;|&Uacute;|Ú', r"{U'}", txt) # U-acute
txt = re.sub('&#219;|&Ucirc;|Û', r'{U^}', txt) # U-circumflex txt = re.sub(r'&#219;|&Ucirc;|Û', r'{U^}', txt) # U-circumflex
txt = re.sub('&#220;|&Uuml;|Ü', r'{U"}', txt) # U-umlaut txt = re.sub(r'&#220;|&Uuml;|Ü', r'{U"}', txt) # U-umlaut
txt = re.sub('&#221;|&Yacute;|Ý', r"{Y'}", txt) # Y-grave txt = re.sub(r'&#221;|&Yacute;|Ý', r"{Y'}", txt) # Y-grave
txt = re.sub('&#223;|&szlig;|ß', r'{sz}', txt) # sharp-s txt = re.sub(r'&#223;|&szlig;|ß', r'{sz}', txt) # sharp-s
txt = re.sub('&#224;|&agrave;|à', r'{a`}', txt) # a-grave txt = re.sub(r'&#224;|&agrave;|à', r'{a`}', txt) # a-grave
txt = re.sub('&#225;|&aacute;|á', r"{a'}", txt) # a-acute txt = re.sub(r'&#225;|&aacute;|á', r"{a'}", txt) # a-acute
txt = re.sub('&#226;|&acirc;|â', r'{a^}', txt) # a-circumflex txt = re.sub(r'&#226;|&acirc;|â', r'{a^}', txt) # a-circumflex
txt = re.sub('&#227;|&atilde;|ã', r'{a~}', txt) # a-tilde txt = re.sub(r'&#227;|&atilde;|ã', r'{a~}', txt) # a-tilde
txt = re.sub('&#228;|&auml;|ä', r'{a"}', txt) # a-umlaut txt = re.sub(r'&#228;|&auml;|ä', r'{a"}', txt) # a-umlaut
txt = re.sub('&#229;|&aring;|å', r'{ao}', txt) # a-ring txt = re.sub(r'&#229;|&aring;|å', r'{ao}', txt) # a-ring
txt = re.sub('&#230;|&aelig;|æ', r'{ae}', txt) # ae txt = re.sub(r'&#230;|&aelig;|æ', r'{ae}', txt) # ae
txt = re.sub('&#231;|&ccedil;|ç', r'{c,}', txt) # c-cedilla txt = re.sub(r'&#231;|&ccedil;|ç', r'{c,}', txt) # c-cedilla
txt = re.sub('&#232;|&egrave;|è', r'{e`}', txt) # e-grave txt = re.sub(r'&#232;|&egrave;|è', r'{e`}', txt) # e-grave
txt = re.sub('&#233;|&eacute;|é', r"{e'}", txt) # e-acute txt = re.sub(r'&#233;|&eacute;|é', r"{e'}", txt) # e-acute
txt = re.sub('&#234;|&ecirc;|ê', r'{e^}', txt) # e-circumflex txt = re.sub(r'&#234;|&ecirc;|ê', r'{e^}', txt) # e-circumflex
txt = re.sub('&#235;|&euml;|ë', r'{e"}', txt) # e-umlaut txt = re.sub(r'&#235;|&euml;|ë', r'{e"}', txt) # e-umlaut
txt = re.sub('&#236;|&igrave;|ì', r'{i`}', txt) # i-grave txt = re.sub(r'&#236;|&igrave;|ì', r'{i`}', txt) # i-grave
txt = re.sub('&#237;|&iacute;|í', r"{i'}", txt) # i-acute txt = re.sub(r'&#237;|&iacute;|í', r"{i'}", txt) # i-acute
txt = re.sub('&#238;|&icirc;|î', r'{i^}', txt) # i-circumflex txt = re.sub(r'&#238;|&icirc;|î', r'{i^}', txt) # i-circumflex
txt = re.sub('&#239;|&iuml;|ï', r'{i"}', txt) # i-umlaut txt = re.sub(r'&#239;|&iuml;|ï', r'{i"}', txt) # i-umlaut
txt = re.sub('&#240;|&eth;|ð', r'{d-}', txt) # eth txt = re.sub(r'&#240;|&eth;|ð', r'{d-}', txt) # eth
txt = re.sub('&#241;|&ntilde;|ñ', r'{n~}', txt) # n-tilde txt = re.sub(r'&#241;|&ntilde;|ñ', r'{n~}', txt) # n-tilde
txt = re.sub('&#242;|&ograve;|ò', r'{o`}', txt) # o-grave txt = re.sub(r'&#242;|&ograve;|ò', r'{o`}', txt) # o-grave
txt = re.sub('&#243;|&oacute;|ó', r"{o'}", txt) # o-acute txt = re.sub(r'&#243;|&oacute;|ó', r"{o'}", txt) # o-acute
txt = re.sub('&#244;|&ocirc;|ô', r'{o^}', txt) # o-circumflex txt = re.sub(r'&#244;|&ocirc;|ô', r'{o^}', txt) # o-circumflex
txt = re.sub('&#245;|&otilde;|õ', r'{o~}', txt) # o-tilde txt = re.sub(r'&#245;|&otilde;|õ', r'{o~}', txt) # o-tilde
txt = re.sub('&#246;|&ouml;|ö', r'{o"}', txt) # o-umlaut txt = re.sub(r'&#246;|&ouml;|ö', r'{o"}', txt) # o-umlaut
txt = re.sub('&#248;|&oslash;|ø', r'{o/}', txt) # o-stroke txt = re.sub(r'&#248;|&oslash;|ø', r'{o/}', txt) # o-stroke
txt = re.sub('&#249;|&ugrave;|ù', r'{u`}', txt) # u-grave txt = re.sub(r'&#249;|&ugrave;|ù', r'{u`}', txt) # u-grave
txt = re.sub('&#250;|&uacute;|ú', r"{u'}", txt) # u-acute txt = re.sub(r'&#250;|&uacute;|ú', r"{u'}", txt) # u-acute
txt = re.sub('&#251;|&ucirc;|û', r'{u^}', txt) # u-circumflex txt = re.sub(r'&#251;|&ucirc;|û', r'{u^}', txt) # u-circumflex
txt = re.sub('&#252;|&uuml;|ü', r'{u"}', txt) # u-umlaut txt = re.sub(r'&#252;|&uuml;|ü', r'{u"}', txt) # u-umlaut
txt = re.sub('&#253;|&yacute;|ý', r"{y'}", txt) # y-acute txt = re.sub(r'&#253;|&yacute;|ý', r"{y'}", txt) # y-acute
txt = re.sub('&#255;|&yuml;|ÿ', r'{y"}', txt) # y-umlaut txt = re.sub(r'&#255;|&yuml;|ÿ', r'{y"}', txt) # y-umlaut
txt = re.sub('&#268;|&Ccaron;|Č', r'{Cˇ}', txt) # C-caron txt = re.sub(r'&#268;|&Ccaron;|Č', r'{Cˇ}', txt) # C-caron
txt = re.sub('&#269;|&ccaron;|č', r'{cˇ}', txt) # c-caron txt = re.sub(r'&#269;|&ccaron;|č', r'{cˇ}', txt) # c-caron
txt = re.sub('&#270;|&Dcaron;|Ď', r'{Dˇ}', txt) # D-caron txt = re.sub(r'&#270;|&Dcaron;|Ď', r'{Dˇ}', txt) # D-caron
txt = re.sub('&#271;|&dcaron;|ď', r'{dˇ}', txt) # d-caron txt = re.sub(r'&#271;|&dcaron;|ď', r'{dˇ}', txt) # d-caron
txt = re.sub('&#282;|&Ecaron;|Ě', r'{Eˇ}', txt) # E-caron txt = re.sub(r'&#282;|&Ecaron;|Ě', r'{Eˇ}', txt) # E-caron
txt = re.sub('&#283;|&ecaron;|ě', r'{eˇ}', txt) # e-caron txt = re.sub(r'&#283;|&ecaron;|ě', r'{eˇ}', txt) # e-caron
txt = re.sub('&#313;|&Lacute;|Ĺ', r"{L'}", txt) # L-acute txt = re.sub(r'&#313;|&Lacute;|Ĺ', r"{L'}", txt) # L-acute
txt = re.sub('&#314;|&lacute;|ĺ', r"{l'}", txt) # l-acute txt = re.sub(r'&#314;|&lacute;|ĺ', r"{l'}", txt) # l-acute
txt = re.sub('&#317;|&Lcaron;|Ľ', r'{Lˇ}', txt) # L-caron txt = re.sub(r'&#317;|&Lcaron;|Ľ', r'{Lˇ}', txt) # L-caron
txt = re.sub('&#318;|&lcaron;|ľ', r'{lˇ}', txt) # l-caron txt = re.sub(r'&#318;|&lcaron;|ľ', r'{lˇ}', txt) # l-caron
txt = re.sub('&#327;|&Ncaron;|Ň', r'{Nˇ}', txt) # N-caron txt = re.sub(r'&#327;|&Ncaron;|Ň', r'{Nˇ}', txt) # N-caron
txt = re.sub('&#328;|&ncaron;|ň', r'{nˇ}', txt) # n-caron txt = re.sub(r'&#328;|&ncaron;|ň', r'{nˇ}', txt) # n-caron
txt = re.sub('&#338;|&OElig;|Œ', r'{OE}', txt) # OE txt = re.sub(r'&#338;|&OElig;|Œ', r'{OE}', txt) # OE
txt = re.sub('&#339;|&oelig;|œ', r'{oe}', txt) # oe txt = re.sub(r'&#339;|&oelig;|œ', r'{oe}', txt) # oe
txt = re.sub('&#340;|&Racute;|Ŕ', r"{R'}", txt) # R-acute txt = re.sub(r'&#340;|&Racute;|Ŕ', r"{R'}", txt) # R-acute
txt = re.sub('&#341;|&racute;|ŕ', r"{r'}", txt) # r-acute txt = re.sub(r'&#341;|&racute;|ŕ', r"{r'}", txt) # r-acute
txt = re.sub('&#344;|&Rcaron;|Ř', r'{Rˇ}', txt) # R-caron txt = re.sub(r'&#344;|&Rcaron;|Ř', r'{Rˇ}', txt) # R-caron
txt = re.sub('&#345;|&rcaron;|ř', r'{rˇ}', txt) # r-caron txt = re.sub(r'&#345;|&rcaron;|ř', r'{rˇ}', txt) # r-caron
txt = re.sub('&#348;|Ŝ', r'{S^}', txt) # S-circumflex txt = re.sub(r'&#348;|Ŝ', r'{S^}', txt) # S-circumflex
txt = re.sub('&#349;|ŝ', r'{s^}', txt) # s-circumflex txt = re.sub(r'&#349;|ŝ', r'{s^}', txt) # s-circumflex
txt = re.sub('&#352;|&Scaron;|Š', r'{Sˇ}', txt) # S-caron txt = re.sub(r'&#352;|&Scaron;|Š', r'{Sˇ}', txt) # S-caron
txt = re.sub('&#353;|&scaron;|š', r'{sˇ}', txt) # s-caron txt = re.sub(r'&#353;|&scaron;|š', r'{sˇ}', txt) # s-caron
txt = re.sub('&#356;|&Tcaron;|Ť', r'{Tˇ}', txt) # T-caron txt = re.sub(r'&#356;|&Tcaron;|Ť', r'{Tˇ}', txt) # T-caron
txt = re.sub('&#357;|&tcaron;|ť', r'{tˇ}', txt) # t-caron txt = re.sub(r'&#357;|&tcaron;|ť', r'{tˇ}', txt) # t-caron
txt = re.sub('&#366;|&Uring;|Ů', r'{U°}', txt) # U-ring txt = re.sub(r'&#366;|&Uring;|Ů', r'{U°}', txt) # U-ring
txt = re.sub('&#367;|&uring;|ů', r'{u°}', txt) # u-ring txt = re.sub(r'&#367;|&uring;|ů', r'{u°}', txt) # u-ring
txt = re.sub('&#381;|&Zcaron;|Ž', r'{Zˇ}', txt) # Z-caron txt = re.sub(r'&#381;|&Zcaron;|Ž', r'{Zˇ}', txt) # Z-caron
txt = re.sub('&#382;|&zcaron;|ž', r'{zˇ}', txt) # z-caron txt = re.sub(r'&#382;|&zcaron;|ž', r'{zˇ}', txt) # z-caron
txt = re.sub('&#8226;|&bull;|•', r'{*}', txt) # bullet txt = re.sub(r'&#8226;|&bull;|•', r'{*}', txt) # bullet
txt = re.sub('&#8355;|₣', r'{Fr}', txt) # Franc txt = re.sub(r'&#8355;|₣', r'{Fr}', txt) # Franc
txt = re.sub('&#8356;|₤', r'{L=}', txt) # Lira txt = re.sub(r'&#8356;|₤', r'{L=}', txt) # Lira
txt = re.sub('&#8360;|₨', r'{Rs}', txt) # Rupee txt = re.sub(r'&#8360;|₨', r'{Rs}', txt) # Rupee
txt = re.sub('&#8364;|&euro;|€', r'{C=}', txt) # euro txt = re.sub(r'&#8364;|&euro;|€', r'{C=}', txt) # euro
txt = re.sub('&#8482;|&trade;|™', r'{tm}', txt) # trademark txt = re.sub(r'&#8482;|&trade;|™', r'{tm}', txt) # trademark
txt = re.sub('&#9824;|&spades;|♠', r'{spade}', txt) # spade txt = re.sub(r'&#9824;|&spades;|♠', r'{spade}', txt) # spade
txt = re.sub('&#9827;|&clubs;|♣', r'{club}', txt) # club txt = re.sub(r'&#9827;|&clubs;|♣', r'{club}', txt) # club
txt = re.sub('&#9829;|&hearts;|♥', r'{heart}', txt) # heart txt = re.sub(r'&#9829;|&hearts;|♥', r'{heart}', txt) # heart
txt = re.sub('&#9830;|&diams;|♦', r'{diamond}', txt) # diamond txt = re.sub(r'&#9830;|&diams;|♦', r'{diamond}', txt) # diamond
# Move into main code? # Move into main code?
# txt = re.sub('\xa0', r'p. ', txt) # blank paragraph # txt = re.sub('\xa0', r'p. ', txt) # blank paragraph

View File

@ -51,9 +51,9 @@ class MarkdownMLizer(OEB2HTML):
def tidy_up(self, text): def tidy_up(self, text):
# Remove blank space form beginning of paragraph. # Remove blank space form beginning of paragraph.
text = re.sub('(?msu)^[ ]{1,3}', '', text) text = re.sub(r'(?msu)^[ ]{1,3}', '', text)
# pre has 4 spaces. We trimmed 3 so anything with a space left is a pre. # pre has 4 spaces. We trimmed 3 so anything with a space left is a pre.
text = re.sub('(?msu)^[ ]', ' ', text) text = re.sub(r'(?msu)^[ ]', ' ', text)
# Remove tabs that aren't at the beginning of a line # Remove tabs that aren't at the beginning of a line
new_text = [] new_text = []
@ -68,7 +68,7 @@ class MarkdownMLizer(OEB2HTML):
text = '\n'.join(new_text) text = '\n'.join(new_text)
# Remove spaces from blank lines. # Remove spaces from blank lines.
text = re.sub('(?msu)^[ ]+$', '', text) text = re.sub(r'(?msu)^[ ]+$', '', text)
# Reduce blank lines # Reduce blank lines
text = re.sub('(?msu)\n{7,}', '\n' * 6, text) text = re.sub('(?msu)\n{7,}', '\n' * 6, text)

View File

@ -34,7 +34,7 @@ def clean_txt(txt):
txt = re.sub('(?m)(?<=^)([ ]{2,}|\t+)(?=.)', '&nbsp;' * 4, txt) txt = re.sub('(?m)(?<=^)([ ]{2,}|\t+)(?=.)', '&nbsp;' * 4, txt)
# Condense redundant spaces # Condense redundant spaces
txt = re.sub('[ ]{2,}', ' ', txt) txt = re.sub(r'[ ]{2,}', ' ', txt)
# Remove blank space from the beginning and end of the document. # Remove blank space from the beginning and end of the document.
txt = re.sub(r'^\s+(?=.)', '', txt) txt = re.sub(r'^\s+(?=.)', '', txt)
@ -213,7 +213,7 @@ def preserve_spaces(txt):
''' '''
Replaces spaces multiple spaces with &nbsp; entities. Replaces spaces multiple spaces with &nbsp; entities.
''' '''
txt = re.sub('(?P<space>[ ]{2,})', lambda mo: ' ' + ('&nbsp;' * (len(mo.group('space')) - 1)), txt) txt = re.sub(r'(?P<space>[ ]{2,})', lambda mo: ' ' + ('&nbsp;' * (len(mo.group('space')) - 1)), txt)
txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;') txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')
return txt return txt
@ -325,9 +325,9 @@ def detect_formatting_type(txt):
# Check for markdown # Check for markdown
# Headings # Headings
markdown_count += len(re.findall('(?mu)^#+', txt)) markdown_count += len(re.findall(r'(?mu)^#+', txt))
markdown_count += len(re.findall('(?mu)^=+$', txt)) markdown_count += len(re.findall(r'(?mu)^=+$', txt))
markdown_count += len(re.findall('(?mu)^-+$', txt)) markdown_count += len(re.findall(r'(?mu)^-+$', txt))
# Images # Images
markdown_count += len(re.findall(r'(?u)!\[.*?\](\[|\()', txt)) markdown_count += len(re.findall(r'(?u)!\[.*?\](\[|\()', txt))
# Links # Links

View File

@ -126,7 +126,7 @@ class TXTMLizer:
text = re.sub('(?<=.)\n(?=.)', ' ', text) text = re.sub('(?<=.)\n(?=.)', ' ', text)
# Remove multiple spaces. # Remove multiple spaces.
text = re.sub('[ ]{2,}', ' ', text) text = re.sub(r'[ ]{2,}', ' ', text)
# Remove excessive newlines. # Remove excessive newlines.
text = re.sub('\n[ ]+\n', '\n\n', text) text = re.sub('\n[ ]+\n', '\n\n', text)
@ -140,8 +140,8 @@ class TXTMLizer:
# Replace spaces at the beginning and end of lines # Replace spaces at the beginning and end of lines
# We don't replace tabs because those are only added # We don't replace tabs because those are only added
# when remove paragraph spacing is enabled. # when remove paragraph spacing is enabled.
text = re.sub('(?imu)^[ ]+', '', text) text = re.sub(r'(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text) text = re.sub(r'(?imu)[ ]+$', '', text)
# Remove empty space and newlines at the beginning of the document. # Remove empty space and newlines at the beginning of the document.
text = re.sub(r'(?u)^[ \n]+', '', text) text = re.sub(r'(?u)^[ \n]+', '', text)

View File

@ -406,7 +406,7 @@ class SearchDialog(QDialog):
self.resize(self.sizeHint()) self.resize(self.sizeHint())
def retrieve_template_search(self): def retrieve_template_search(self):
template, sep, query = re.split('#@#:([tdnb]):', self.current_search_text, flags=re.IGNORECASE) template, sep, query = re.split(r'#@#:([tdnb]):', self.current_search_text, flags=re.IGNORECASE)
self.template_value_box.setText(query) self.template_value_box.setText(query)
cb = self.template_test_type_box cb = self.template_test_type_box
for idx in range(0, cb.count()): for idx in range(0, cb.count()):

View File

@ -744,7 +744,7 @@ class CreateCustomColumn(QDialog):
return self.simple_error('', _('The colors box must be empty or ' return self.simple_error('', _('The colors box must be empty or '
'contain the same number of items as the value box')) 'contain the same number of items as the value box'))
for tc in c: for tc in c:
if tc not in QColor.colorNames() and not re.match('#(?:[0-9a-f]{3}){1,4}',tc,re.I): if tc not in QColor.colorNames() and not re.match(r'#(?:[0-9a-f]{3}){1,4}',tc,re.I):
return self.simple_error('', _('The color {0} is unknown').format(tc)) return self.simple_error('', _('The color {0} is unknown').format(tc))
display_dict = {'enum_values': l, 'enum_colors': c} display_dict = {'enum_values': l, 'enum_colors': c}
if default_val: if default_val:

View File

@ -146,7 +146,7 @@ class EmailAccounts(QAbstractTableModel): # {{{
if aval: if aval:
self.tags[account] = aval self.tags[account] = aval
elif col == 1: elif col == 1:
self.accounts[account][0] = re.sub(',+', ',', re.sub(r'\s+', ',', as_unicode(value or '').upper())) self.accounts[account][0] = re.sub(r',+', ',', re.sub(r'\s+', ',', as_unicode(value or '').upper()))
elif col == 0: elif col == 0:
na = as_unicode(value or '').strip() na = as_unicode(value or '').strip()
from email.utils import parseaddr from email.utils import parseaddr

View File

@ -920,6 +920,6 @@ if __name__ == '__main__': # {{{
def callback(ed): def callback(ed):
import regex import regex
ed.find_text(regex.compile('A bold word')) ed.find_text(regex.compile(r'A bold word'))
launch_editor(raw, path_is_raw=True, syntax='html', callback=callback) launch_editor(raw, path_is_raw=True, syntax='html', callback=callback)
# }}} # }}}

View File

@ -3828,7 +3828,7 @@ class CatalogBuilder:
# if self.opts.numbers_as_text and re.match('[0-9]+',word[0]): # if self.opts.numbers_as_text and re.match('[0-9]+',word[0]):
translated.append(NumberToText(word).text.capitalize()) translated.append(NumberToText(word).text.capitalize())
else: else:
if re.match('[0-9]+', word[0]): if re.match(r'[0-9]+', word[0]):
word = word.replace(',', '') word = word.replace(',', '')
suffix = re.search(r'[\D]', word) suffix = re.search(r'[\D]', word)
if suffix: if suffix:
@ -3844,7 +3844,7 @@ class CatalogBuilder:
translated.append(capitalize(word)) translated.append(capitalize(word))
else: else:
if re.search('[0-9]+', word[0]): if re.search(r'[0-9]+', word[0]):
word = word.replace(',', '') word = word.replace(',', '')
suffix = re.search(r'[\D]', word) suffix = re.search(r'[\D]', word)
if suffix: if suffix:
@ -4114,7 +4114,7 @@ class CatalogBuilder:
Return: Return:
(str): char if A-z, else SYMBOLS (str): char if A-z, else SYMBOLS
''' '''
if not re.search('[a-zA-Z]', ascii_text(char)): if not re.search(r'[a-zA-Z]', ascii_text(char)):
return self.SYMBOLS return self.SYMBOLS
else: else:
return char return char

View File

@ -87,7 +87,7 @@ class NumberToText: # {{{
self.log('numberTranslate(): %s' % self.number) self.log('numberTranslate(): %s' % self.number)
# Special case ordinals # Special case ordinals
if re.search('[st|nd|rd|th]',self.number): if re.search(r'[st|nd|rd|th]',self.number):
self.number = self.number.replace(',', '') self.number = self.number.replace(',', '')
ordinal_suffix = re.search(r'[\D]', self.number) ordinal_suffix = re.search(r'[\D]', self.number)
ordinal_number = re.sub(r'\D','',self.number.replace(',', '')) ordinal_number = re.sub(r'\D','',self.number.replace(',', ''))
@ -134,7 +134,7 @@ class NumberToText: # {{{
self.log('Hyphenated: %s' % self.number) self.log('Hyphenated: %s' % self.number)
self.number_as_float = self.number.split('-')[0] self.number_as_float = self.number.split('-')[0]
strings = self.number.split('-') strings = self.number.split('-')
if re.search('[0-9]+', strings[0]): if re.search(r'[0-9]+', strings[0]):
left = NumberToText(strings[0]).text left = NumberToText(strings[0]).text
right = strings[1] right = strings[1]
else: else:
@ -143,7 +143,7 @@ class NumberToText: # {{{
self.text = f'{left}-{right}' self.text = f'{left}-{right}'
# Test for only commas and numbers # Test for only commas and numbers
elif ',' in self.number and not re.search('[^0-9,]',self.number): elif ',' in self.number and not re.search(r'[^0-9,]',self.number):
if self.verbose: if self.verbose:
self.log('Comma(s): %s' % self.number) self.log('Comma(s): %s' % self.number)
self.number_as_float = self.number.replace(',', '') self.number_as_float = self.number.replace(',', '')

View File

@ -1504,11 +1504,11 @@ def text_to_tokens(text):
text = match.group(1) text = match.group(1)
OR = True OR = True
tokens = [] tokens = []
quot = re.search('"(.*?)"', text) quot = re.search(r'"(.*?)"', text)
while quot: while quot:
tokens.append(quot.group(1)) tokens.append(quot.group(1))
text = text.replace('"'+quot.group(1)+'"', '') text = text.replace('"'+quot.group(1)+'"', '')
quot = re.search('"(.*?)"', text) quot = re.search(r'"(.*?)"', text)
tokens += text.split(' ') tokens += text.split(' ')
ans = [] ans = []
for i in tokens: for i in tokens:

View File

@ -2556,7 +2556,7 @@ class BibTeX:
self.invalid_cit = re.compile('[ "@\',\\#}{~%&$^]') self.invalid_cit = re.compile('[ "@\',\\#}{~%&$^]')
self.upper = re.compile('[' + self.upper = re.compile('[' +
string.ascii_uppercase + ']') string.ascii_uppercase + ']')
self.escape = re.compile('[#&%_]') self.escape = re.compile(r'[#&%_]')
def ValidateCitationKey(self, text): def ValidateCitationKey(self, text):
''' '''

View File

@ -59,7 +59,7 @@ def get_opts_from_parser(parser, prefix):
def send(ans): def send(ans):
pat = re.compile('([^0-9a-zA-Z_./-])') pat = re.compile(r'([^0-9a-zA-Z_./-])')
for x in sorted(set(ans)): for x in sorted(set(ans)):
x = pat.sub(lambda m : '\\'+m.group(1), x) x = pat.sub(lambda m : '\\'+m.group(1), x)
if x.endswith('\\ '): if x.endswith('\\ '):

View File

@ -384,7 +384,7 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
repl_func = partial(fd_repl_func, dt, 'ap' in format.lower()) repl_func = partial(fd_repl_func, dt, 'ap' in format.lower())
return re.sub( return re.sub(
'(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))', r'(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))',
repl_func, format) repl_func, format)
# }}} # }}}
@ -460,7 +460,7 @@ def clean_date_for_sort(dt, fmt=None):
'min':UNDEFINED_DATE.minute, 'sec':UNDEFINED_DATE.second} 'min':UNDEFINED_DATE.minute, 'sec':UNDEFINED_DATE.second}
repl_func = partial(cd_repl_func, tt, dt) repl_func = partial(cd_repl_func, tt, dt)
re.sub('(s{1,2})|(m{1,2})|(h{1,2})|(d{1,4}|M{1,4}|(?:yyyy|yy))', repl_func, fmt) re.sub(r'(s{1,2})|(m{1,2})|(h{1,2})|(d{1,4}|M{1,4}|(?:yyyy|yy))', repl_func, fmt)
return dt.replace(year=tt['year'], month=tt['mon'], day=tt['day'], hour=tt['hour'], return dt.replace(year=tt['year'], month=tt['mon'], day=tt['day'], hour=tt['hour'],
minute=tt['min'], second=tt['sec'], microsecond=0) minute=tt['min'], second=tt['sec'], microsecond=0)
# }}} # }}}

View File

@ -90,7 +90,7 @@ def get_system_locale():
def sanitize_lang(lang): def sanitize_lang(lang):
if lang: if lang:
match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang) match = re.match(r'[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
if match: if match:
lang = match.group() lang = match.group()
if lang == 'zh': if lang == 'zh':

View File

@ -195,7 +195,7 @@ class Parser:
def tokenize(self, expr): def tokenize(self, expr):
# convert docstrings to base64 to avoid all processing. Change the docstring # convert docstrings to base64 to avoid all processing. Change the docstring
# indicator to something unique with no characters special to the parser. # indicator to something unique with no characters special to the parser.
expr = re.sub('(""")(..*?)(""")', expr = re.sub(r'(""")(..*?)(""")',
lambda mo: self.docstring_sep + as_hex_unicode(mo.group(2)) + self.docstring_sep, lambda mo: self.docstring_sep + as_hex_unicode(mo.group(2)) + self.docstring_sep,
expr, flags=re.DOTALL) expr, flags=re.DOTALL)

View File

@ -1730,7 +1730,7 @@ class BasicNewsRecipe(Recipe):
def error_in_article_download(self, request, traceback): def error_in_article_download(self, request, traceback):
self.jobs_done += 1 self.jobs_done += 1
if traceback and re.search('^AbortArticle:', traceback, flags=re.M) is not None: if traceback and re.search(r'^AbortArticle:', traceback, flags=re.M) is not None:
self.log.warn('Aborted download of article:', request.article.title, self.log.warn('Aborted download of article:', request.article.title,
'from', request.article.url) 'from', request.article.url)
self.report_progress(float(self.jobs_done)/len(self.jobs), self.report_progress(float(self.jobs_done)/len(self.jobs),

View File

@ -59,7 +59,7 @@ def styleFromList(styleName, specArray, spacing, showAllLevels):
numbered = False numbered = False
displayLevels = 0 displayLevels = 0
listStyle = ListStyle(name=styleName) listStyle = ListStyle(name=styleName)
numFormatPattern = re.compile('([1IiAa])') numFormatPattern = re.compile(r'([1IiAa])')
cssLengthPattern = re.compile('([^a-z]+)\\s*([a-z]+)?') cssLengthPattern = re.compile('([^a-z]+)\\s*([a-z]+)?')
m = cssLengthPattern.search(spacing) m = cssLengthPattern.search(spacing)
if (m is not None): if (m is not None):