mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix syntax errors manually
This commit is contained in:
parent
655ab21b0b
commit
999175cf55
@ -237,7 +237,7 @@ def render_options(cmd, groups, options_header=True, add_program=True, header_le
|
||||
|
||||
|
||||
def mark_options(raw):
|
||||
raw = re.sub(r'(\s+)--(\s+)', ur'\1``--``\2', raw)
|
||||
raw = re.sub(r'(\s+)--(\s+)', u'\\1``--``\\2', raw)
|
||||
|
||||
def sub(m):
|
||||
opt = m.group()
|
||||
|
@ -18,9 +18,9 @@ class LaTeXHelpBuilder(LaTeXBuilder):
|
||||
LaTeXBuilder.finish(self)
|
||||
self.info('Fixing Cyrillic characters...')
|
||||
tex = os.path.join(self.outdir, 'calibre.tex')
|
||||
with open(tex, 'r+b') as f:
|
||||
with open(tex, 'r+') as f:
|
||||
raw = f.read()
|
||||
for x in (b'Михаил Горбачёв', b'Фёдор Миха́йлович Достоевский'):
|
||||
raw = raw.replace(x, br'{\fontencoding{T2A}\selectfont %s}' % (x.replace(b'а́', b'a')))
|
||||
for x in (u'Михаил Горбачёв', u'Фёдор Миха́йлович Достоевский'):
|
||||
raw = raw.replace(x, u'{\\fontencoding{T2A}\\selectfont %s}' % (x.replace(u'а́', u'a')))
|
||||
f.seek(0)
|
||||
f.write(raw)
|
||||
|
@ -64,23 +64,23 @@ class TXT2TXTZ(FileTypePlugin):
|
||||
images = []
|
||||
|
||||
# Textile
|
||||
for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
|
||||
for m in re.finditer(unicode(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
|
||||
path = m.group('path')
|
||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||
images.append(path)
|
||||
|
||||
# Markdown inline
|
||||
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt): # noqa
|
||||
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
|
||||
path = m.group('path')
|
||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||
images.append(path)
|
||||
|
||||
# Markdown reference
|
||||
refs = {}
|
||||
for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
|
||||
for m in re.finditer(unicode(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
|
||||
if m.group('id') and m.group('path'):
|
||||
refs[m.group('id')] = m.group('path')
|
||||
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]', txt): # noqa
|
||||
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
|
||||
path = refs.get(m.group('id'), None)
|
||||
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
|
||||
images.append(path)
|
||||
|
@ -315,7 +315,7 @@ class CSSPreProcessor(object):
|
||||
# are commented lines before the first @import or @charset rule. Since
|
||||
# the conversion will remove all stylesheets anyway, we don't lose
|
||||
# anything
|
||||
data = re.sub(ur'/\*.*?\*/', u'', data, flags=re.DOTALL)
|
||||
data = re.sub(unicode(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
|
||||
|
||||
ans, namespaced = [], False
|
||||
for line in data.splitlines():
|
||||
@ -533,7 +533,7 @@ class HTMLPreProcessor(object):
|
||||
start_rules = []
|
||||
if is_pdftohtml:
|
||||
# Remove non breaking spaces
|
||||
start_rules.append((re.compile(ur'\u00a0'), lambda match : ' '))
|
||||
start_rules.append((re.compile(unicode(r'\u00a0')), lambda match : ' '))
|
||||
|
||||
if not getattr(self.extra_opts, 'keep_ligatures', False):
|
||||
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
|
||||
|
@ -157,17 +157,17 @@ class HeuristicProcessor(object):
|
||||
]
|
||||
|
||||
ITALICIZE_STYLE_PATS = [
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_'
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*><]+)/'),
|
||||
unicode(r'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_'),
|
||||
]
|
||||
|
||||
for word in ITALICIZE_WORDS:
|
||||
@ -419,7 +419,7 @@ class HeuristicProcessor(object):
|
||||
return html
|
||||
|
||||
def fix_nbsp_indents(self, html):
|
||||
txtindent = re.compile(ur'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE)
|
||||
txtindent = re.compile(unicode(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
|
||||
html = txtindent.sub(self.insert_indent, html)
|
||||
if self.found_indents > 1:
|
||||
self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
|
||||
@ -427,10 +427,10 @@ class HeuristicProcessor(object):
|
||||
|
||||
def cleanup_markup(self, html):
|
||||
# remove remaining non-breaking spaces
|
||||
html = re.sub(ur'\u00a0', ' ', html)
|
||||
html = re.sub(unicode(r'\u00a0'), ' ', html)
|
||||
# Get rid of various common microsoft specific tags which can cause issues later
|
||||
# Get rid of empty <o:p> tags to simplify other processing
|
||||
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
||||
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html)
|
||||
# Delete microsoft 'smart' tags
|
||||
html = re.sub('(?i)</?st1:\w+>', '', html)
|
||||
# Re-open self closing paragraph tags
|
||||
|
@ -108,7 +108,7 @@ class HTMLConverter(object):
|
||||
re.IGNORECASE), lambda m: '<br />'),
|
||||
|
||||
# Replace entities
|
||||
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
|
||||
(re.compile(u'&(\\S+?);'), partial(entity_to_unicode,
|
||||
exceptions=['lt', 'gt', 'amp', 'quot'])),
|
||||
# Remove comments from within style tags as they can mess up BeatifulSoup
|
||||
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
|
||||
|
@ -233,7 +233,7 @@ class OverDrive(Source):
|
||||
xreq.add_header('Referer', q_init_search)
|
||||
xreq.add_header('Accept', 'application/json, text/javascript, */*')
|
||||
raw = br.open_novisit(xreq).read()
|
||||
for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
|
||||
for m in re.finditer(unicode(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
|
||||
if int(m.group('totalrecords')) == 0:
|
||||
return ''
|
||||
elif int(m.group('displayrecords')) >= 1:
|
||||
|
@ -358,7 +358,7 @@ class MobiReader(object):
|
||||
self.processed_html = re.sub(
|
||||
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
|
||||
bods = htmls = 0
|
||||
for x in re.finditer(ur'</body>|</html>', self.processed_html):
|
||||
for x in re.finditer(u'</body>|</html>', self.processed_html):
|
||||
if x == '</body>':
|
||||
bods +=1
|
||||
else:
|
||||
|
@ -163,7 +163,7 @@ def resolve_styles(container, name, select=None, sheet_callback=None):
|
||||
style_map = defaultdict(list)
|
||||
pseudo_style_map = defaultdict(list)
|
||||
rule_index_counter = count()
|
||||
pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
|
||||
pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
|
||||
|
||||
def process_sheet(sheet, sheet_name):
|
||||
if sheet_callback is not None:
|
||||
|
@ -94,7 +94,7 @@ class Structure(BaseTest):
|
||||
self.assertEqual(3, c.opf_version_parsed.major)
|
||||
self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files
|
||||
c.add_file('nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
|
||||
'<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>',
|
||||
b'<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>',
|
||||
process_manifest_item=lambda item:item.set('properties', 'nav'))
|
||||
toc = get_toc(c)
|
||||
self.assertTrue(len(toc))
|
||||
@ -132,9 +132,9 @@ class Structure(BaseTest):
|
||||
c = self.create_epub([cmi('xxx.html'), cmi('a.html')], ver=3)
|
||||
self.assertEqual(3, c.opf_version_parsed.major)
|
||||
c.add_file('xxx/nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
|
||||
'<body><nav epub:type="landmarks"><ol><li><a epub:type="x" href="../xxx.html#moo">XXX </a></li>'
|
||||
'<li><a href="../a.html"> YYY </a></li>'
|
||||
'</ol></nav></body></html>',
|
||||
b'<body><nav epub:type="landmarks"><ol><li><a epub:type="x" href="../xxx.html#moo">XXX </a></li>'
|
||||
b'<li><a href="../a.html"> YYY </a></li>'
|
||||
b'</ol></nav></body></html>',
|
||||
process_manifest_item=lambda item:item.set('properties', 'nav'))
|
||||
self.assertEqual([
|
||||
{'dest':'xxx.html', 'frag':'moo', 'type':'x', 'title':'XXX'}, {'dest':'a.html', 'frag':'', 'type':'', 'title':'YYY'}
|
||||
|
@ -217,7 +217,7 @@ class Stylizer(object):
|
||||
rules.sort()
|
||||
self.rules = rules
|
||||
self._styles = {}
|
||||
pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
|
||||
pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
|
||||
select = Select(tree, ignore_inappropriate_pseudo_classes=True)
|
||||
|
||||
for _, _, cssdict, text, _ in rules:
|
||||
|
@ -293,7 +293,7 @@ class FlowSplitter(object):
|
||||
body = self.get_body(root)
|
||||
if body is None:
|
||||
return False
|
||||
txt = re.sub(ur'\s+|\xa0', '',
|
||||
txt = re.sub(u'\\s+|\\xa0', '',
|
||||
etree.tostring(body, method='text', encoding=unicode))
|
||||
if len(txt) > 1:
|
||||
return False
|
||||
|
@ -278,7 +278,7 @@ class PDFStream(object):
|
||||
self.stream = HashingStream(stream)
|
||||
self.compress = compress
|
||||
self.write_line(PDFVER)
|
||||
self.write_line(b'%íì¦"')
|
||||
self.write_line(u'%íì¦"'.encode())
|
||||
creator = ('%s %s [https://calibre-ebook.com]'%(__appname__,
|
||||
__version__))
|
||||
self.write_line('%% Created by %s'%creator)
|
||||
|
@ -174,7 +174,7 @@ class PMLMLizer(object):
|
||||
|
||||
def prepare_text(self, text):
|
||||
# Replace empty paragraphs with \c pml codes used to denote emtpy lines.
|
||||
text = re.sub(ur'(?<=</p>)\s*<p[^>]*>[\xc2\xa0\s]*</p>', '\\c\n\\c', text)
|
||||
text = re.sub(unicode(r'(?<=</p>)\s*<p[^>]*>[\xc2\xa0\s]*</p>'), '\\c\n\\c', text)
|
||||
return text
|
||||
|
||||
def clean_text(self, text):
|
||||
@ -188,7 +188,7 @@ class PMLMLizer(object):
|
||||
text = text.replace('\\Q="%s"' % unused, '')
|
||||
|
||||
# Remove \Cn tags that are within \x and \Xn tags
|
||||
text = re.sub(ur'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)', '\g<t>\g<a>\g<b>\g<t>', text)
|
||||
text = re.sub(unicode(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), '\g<t>\g<a>\g<b>\g<t>', text)
|
||||
|
||||
# Replace bad characters.
|
||||
text = text.replace(u'\xc2', '')
|
||||
|
@ -119,7 +119,7 @@ class RTFMLizer(object):
|
||||
self.log.debug('Converting %s to RTF markup...' % item.href)
|
||||
# Removing comments is needed as comments with -- inside them can
|
||||
# cause fromstring() to fail
|
||||
content = re.sub(ur'<!--.*?-->', u'', etree.tostring(item.data, encoding=unicode), flags=re.DOTALL)
|
||||
content = re.sub(u'<!--.*?-->', u'', etree.tostring(item.data, encoding=unicode), flags=re.DOTALL)
|
||||
content = self.remove_newlines(content)
|
||||
content = self.remove_tabs(content)
|
||||
content = etree.fromstring(content)
|
||||
|
@ -16,7 +16,7 @@ from calibre.ptempfile import better_mktemp
|
||||
|
||||
|
||||
class FieldsLarge:
|
||||
"""
|
||||
r"""
|
||||
=========================
|
||||
Logic
|
||||
=========================
|
||||
|
@ -64,7 +64,7 @@ class Tokenize:
|
||||
self.__reini_utf8_counters()
|
||||
return token
|
||||
# add a uc control
|
||||
elif token[:3] == '\uc':
|
||||
elif token[:3] == r'\uc':
|
||||
self.__uc_value[-1] = int(token[3:])
|
||||
self.__reini_utf8_counters()
|
||||
return token
|
||||
|
@ -23,10 +23,7 @@ from calibre.utils.date import UNDEFINED_DATE
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.file_type_icons import EXT_MAP
|
||||
|
||||
try:
|
||||
NO_URL_FORMATTING = QUrl.None_
|
||||
except AttributeError:
|
||||
NO_URL_FORMATTING = QUrl.None
|
||||
NO_URL_FORMATTING = QUrl.None_
|
||||
|
||||
# Setup gprefs {{{
|
||||
gprefs = JSONConfig('gui')
|
||||
|
@ -57,7 +57,7 @@ def css():
|
||||
val = P('templates/book_details.css', data=True).decode('utf-8')
|
||||
col = QApplication.instance().palette().color(QPalette.Link).name()
|
||||
val = val.replace('LINK_COLOR', col)
|
||||
_css = re.sub(ur'/\*.*?\*/', '', val, flags=re.DOTALL)
|
||||
_css = re.sub(r'/\*.*?\*/', '', val, flags=re.DOTALL)
|
||||
return _css
|
||||
|
||||
|
||||
|
@ -1151,7 +1151,7 @@ class BooksModel(QAbstractTableModel): # {{{
|
||||
return False
|
||||
val = (int(value) if column == 'rating' else
|
||||
value if column in ('timestamp', 'pubdate')
|
||||
else re.sub(ur'\s', u' ', unicode(value or '').strip()))
|
||||
else re.sub(u'\\s', u' ', unicode(value or '').strip()))
|
||||
id = self.db.id(row)
|
||||
books_to_refresh = set([id])
|
||||
if column == 'rating':
|
||||
|
@ -175,7 +175,7 @@ class Stores(OrderedDict):
|
||||
def load_object(self, src, key):
|
||||
namespace = {}
|
||||
builtin = self[key]
|
||||
exec src in namespace
|
||||
exec(src, namespace)
|
||||
ver = namespace['store_version']
|
||||
cls = None
|
||||
for x in namespace.itervalues():
|
||||
|
@ -56,7 +56,7 @@ def get_newest_version():
|
||||
except UnicodeDecodeError:
|
||||
version = u''
|
||||
ans = NO_CALIBRE_UPDATE
|
||||
m = re.match(ur'(\d+)\.(\d+).(\d+)$', version)
|
||||
m = re.match(unicode(r'(\d+)\.(\d+).(\d+)$'), version)
|
||||
if m is not None:
|
||||
ans = tuple(map(int, (m.group(1), m.group(2), m.group(3))))
|
||||
return ans
|
||||
|
@ -373,14 +373,14 @@ class ZshCompleter(object): # {{{
|
||||
opt_lines.append(ostrings + help_txt + ' \\')
|
||||
opt_lines = ('\n' + (' ' * 8)).join(opt_lines)
|
||||
|
||||
f.write((ur'''
|
||||
f.write((u'''
|
||||
_ebook_edit() {
|
||||
local curcontext="$curcontext" state line ebookfile expl
|
||||
typeset -A opt_args
|
||||
|
||||
_arguments -C -s \
|
||||
_arguments -C -s \\
|
||||
%s
|
||||
"1:ebook file:_files -g '(#i)*.(%s)'" \
|
||||
"1:ebook file:_files -g '(#i)*.(%s)'" \\
|
||||
'*:file in ebook:->files' && return 0
|
||||
|
||||
case $state in
|
||||
@ -393,7 +393,7 @@ _ebook_edit() {
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
_wanted files expl 'file from ebook' \
|
||||
_wanted files expl 'file from ebook' \\
|
||||
_multi_parts / _zip_cache_list && return 0
|
||||
;;
|
||||
esac
|
||||
|
@ -39,7 +39,7 @@ class TestHTTP(BaseTest):
|
||||
'\r\n', a='one', b='two 2 3', c='three')
|
||||
|
||||
test('Non-ascii headers parsing',
|
||||
b'a:mūs\r', '\r\n', a='mūs')
|
||||
'a:mūs\r', '\r\n', a='mūs')
|
||||
|
||||
test('Comma-separated parsing',
|
||||
'Accept-Encoding: one',
|
||||
|
@ -380,7 +380,7 @@ def create_global_prefs(conf_obj=None):
|
||||
c.add_opt('database_path',
|
||||
default=os.path.expanduser('~/library1.db'),
|
||||
help=_('Path to the database in which books are stored'))
|
||||
c.add_opt('filename_pattern', default=ur'(?P<title>.+) - (?P<author>[^_]+)',
|
||||
c.add_opt('filename_pattern', default=u'(?P<title>.+) - (?P<author>[^_]+)',
|
||||
help=_('Pattern to guess metadata from filenames'))
|
||||
c.add_opt('isbndb_com_key', default='',
|
||||
help=_('Access key for isbndb.com'))
|
||||
|
@ -143,15 +143,15 @@ class Parser(object):
|
||||
WORD = 2
|
||||
QUOTED_WORD = 3
|
||||
EOF = 4
|
||||
REPLACEMENTS = tuple((u'\\' + x, unichr(i + 1)) for i, x in enumerate(ur'\"()'))
|
||||
REPLACEMENTS = tuple((u'\\' + x, unichr(i + 1)) for i, x in enumerate(u'\\"()'))
|
||||
|
||||
# Had to translate named constants to numeric values
|
||||
lex_scanner = re.Scanner([
|
||||
(ur'[()]', lambda x,t: (Parser.OPCODE, t)),
|
||||
(ur'@.+?:[^")\s]+', lambda x,t: (Parser.WORD, unicode(t))),
|
||||
(ur'[^"()\s]+', lambda x,t: (Parser.WORD, unicode(t))),
|
||||
(ur'".*?((?<!\\)")', lambda x,t: (Parser.QUOTED_WORD, t[1:-1])),
|
||||
(ur'\s+', None)
|
||||
(unicode(r'[()]'), lambda x,t: (Parser.OPCODE, t)),
|
||||
(unicode(r'@.+?:[^")\s]+'), lambda x,t: (Parser.WORD, unicode(t))),
|
||||
(unicode(r'[^"()\s]+'), lambda x,t: (Parser.WORD, unicode(t))),
|
||||
(unicode(r'".*?((?<!\\)")'), lambda x,t: (Parser.QUOTED_WORD, t[1:-1])),
|
||||
(unicode(r'\s+'), None)
|
||||
], flags=re.DOTALL)
|
||||
|
||||
def token(self, advance=False):
|
||||
|
@ -21,7 +21,7 @@ PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
||||
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
||||
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
|
||||
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
|
||||
CAPFIRST = re.compile(ur"^[%s]*?(\w)" % PUNCT, flags=re.UNICODE)
|
||||
CAPFIRST = re.compile(unicode(r"^[%s]*?(\w)" % PUNCT), flags=re.UNICODE)
|
||||
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U)
|
||||
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
|
||||
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
|
||||
|
@ -1319,7 +1319,7 @@ class _FeedParserMixin:
|
||||
author, email = context.get(key), None
|
||||
if not author:
|
||||
return
|
||||
emailmatch = re.search(ur'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author)
|
||||
emailmatch = re.search(unicode(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?'''), author)
|
||||
if emailmatch:
|
||||
email = emailmatch.group(0)
|
||||
# probably a better way to do the following, but it passes all the tests
|
||||
|
@ -1681,7 +1681,7 @@ class BasicNewsRecipe(Recipe):
|
||||
|
||||
@classmethod
|
||||
def soup(cls, raw):
|
||||
entity_replace = [(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
|
||||
entity_replace = [(re.compile(u'&(\\S+?);'), partial(entity_to_unicode,
|
||||
exceptions=[]))]
|
||||
nmassage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||
nmassage.extend(entity_replace)
|
||||
|
Loading…
x
Reference in New Issue
Block a user