mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-22 06:50:47 -04:00
469 lines
23 KiB
Plaintext
469 lines
23 KiB
Plaintext
# vim:fileencoding=utf-8
|
||
# License: BSD
|
||
# Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||
# Copyright: 2013, Alexander Tsepkov
|
||
|
||
# globals: ρσ_iterator_symbol, ρσ_list_decorate
|
||
|
||
# basic implementation of Python's 're' library
|
||
|
||
|
||
# Alias DB from http://www.unicode.org/Public/8.0.0/ucd/NameAliases.txt {{{
|
||
_ALIAS_MAP = {"null":0,"nul":0,"start of heading":1,"soh":1,"start of text":2,"stx":2,"end of text":3,"etx":3,"end of transmission":4,"eot":4,"enquiry":5,"enq":5,"acknowledge":6,"ack":6,"alert":7,"bel":7,"backspace":8,"bs":8,"character tabulation":9,"horizontal tabulation":9,"ht":9,"tab":9,"line feed":10,"new line":10,"end of line":10,"lf":10,"nl":10,"eol":10,"line tabulation":11,"vertical tabulation":11,"vt":11,"form feed":12,"ff":12,"carriage return":13,"cr":13,"shift out":14,"locking-shift one":14,"so":14,"shift in":15,"locking-shift zero":15,"si":15,"data link escape":16,"dle":16,"device control one":17,"dc1":17,"device control two":18,"dc2":18,"device control three":19,"dc3":19,"device control four":20,"dc4":20,"negative acknowledge":21,"nak":21,"synchronous idle":22,"syn":22,"end of transmission block":23,"etb":23,"cancel":24,"can":24,"end of medium":25,"eom":25,"substitute":26,"sub":26,"escape":27,"esc":27,"information separator four":28,"file separator":28,"fs":28,"information separator three":29,"group separator":29,"gs":29,"information separator two":30,"record separator":30,"rs":30,"information separator one":31,"unit separator":31,"us":31,"sp":32,"delete":127,"del":127,"padding character":128,"pad":128,"high octet preset":129,"hop":129,"break permitted here":130,"bph":130,"no break here":131,"nbh":131,"index":132,"ind":132,"next line":133,"nel":133,"start of selected area":134,"ssa":134,"end of selected area":135,"esa":135,"character tabulation set":136,"horizontal tabulation set":136,"hts":136,"character tabulation with justification":137,"horizontal tabulation with justification":137,"htj":137,"line tabulation set":138,"vertical tabulation set":138,"vts":138,"partial line forward":139,"partial line down":139,"pld":139,"partial line backward":140,"partial line up":140,"plu":140,"reverse line feed":141,"reverse index":141,"ri":141,"single shift two":142,"single-shift-2":142,"ss2":142,"single shift three":143,"single-shift-3":143,"ss3":143,"device control string":144,"dcs":144,"private use one":145,"private use-1":145,"pu1":145,"private use two":146,"private use-2":146,"pu2":146,"set transmit state":147,"sts":147,"cancel character":148,"cch":148,"message waiting":149,"mw":149,"start of guarded area":150,"start of protected area":150,"spa":150,"end of guarded area":151,"end of protected area":151,"epa":151,"start of string":152,"sos":152,"single graphic character introducer":153,"sgc":153,"single character introducer":154,"sci":154,"control sequence introducer":155,"csi":155,"string terminator":156,"st":156,"operating system command":157,"osc":157,"privacy message":158,"pm":158,"application program command":159,"apc":159,"nbsp":160,"shy":173,"latin capital letter gha":418,"latin small letter gha":419,"cgj":847,"alm":1564,"syriac sublinear colon skewed left":1801,"kannada letter llla":3294,"lao letter fo fon":3741,"lao letter fo fay":3743,"lao letter ro":3747,"lao letter lo":3749,"tibetan mark bka- shog gi mgo rgyan":4048,"fvs1":6155,"fvs2":6156,"fvs3":6157,"mvs":6158,"zwsp":8203,"zwnj":8204,"zwj":8205,"lrm":8206,"rlm":8207,"lre":8234,"rle":8235,"pdf":8236,"lro":8237,"rlo":8238,"nnbsp":8239,"mmsp":8287,"wj":8288,"lri":8294,"rli":8295,"fsi":8296,"pdi":8297,"weierstrass elliptic function":8472,"micr on us symbol":9288,"micr dash symbol":9289,"leftwards triangle-headed arrow with double vertical stroke":11130,"rightwards triangle-headed arrow with double vertical stroke":11132,"yi syllable iteration mark":40981,"presentation form for vertical right white lenticular bracket":65048,"vs1":65024,"vs2":65025,"vs3":65026,"vs4":65027,"vs5":65028,"vs6":65029,"vs7":65030,"vs8":65031,"vs9":65032,"vs10":65033,"vs11":65034,"vs12":65035,"vs13":65036,"vs14":65037,"vs15":65038,"vs16":65039,"byte order mark":65279,"bom":65279,"zwnbsp":65279,"cuneiform sign nu11 tenu":74452,"cuneiform sign nu11 over nu11 bur over bur":74453,"byzantine musical symbol fthora skliron chroma vasis":118981,"vs17":917760,"vs18":917761,"vs19":917762,"vs20":917763,"vs21":917764,"vs22":917765,"vs23":917766,"vs24":917767,"vs25":917768,"vs26":917769,"vs27":917770,"vs28":917771,"vs29":917772,"vs30":917773,"vs31":917774,"vs32":917775,"vs33":917776,"vs34":917777,"vs35":917778,"vs36":917779,"vs37":917780,"vs38":917781,"vs39":917782,"vs40":917783,"vs41":917784,"vs42":917785,"vs43":917786,"vs44":917787,"vs45":917788,"vs46":917789,"vs47":917790,"vs48":917791,"vs49":917792,"vs50":917793,"vs51":917794,"vs52":917795,"vs53":917796,"vs54":917797,"vs55":917798,"vs56":917799,"vs57":917800,"vs58":917801,"vs59":917802,"vs60":917803,"vs61":917804,"vs62":917805,"vs63":917806,"vs64":917807,"vs65":917808,"vs66":917809,"vs67":917810,"vs68":917811,"vs69":917812,"vs70":917813,"vs71":917814,"vs72":917815,"vs73":917816,"vs74":917817,"vs75":917818,"vs76":917819,"vs77":917820,"vs78":917821,"vs79":917822,"vs80":917823,"vs81":917824,"vs82":917825,"vs83":917826,"vs84":917827,"vs85":917828,"vs86":917829,"vs87":917830,"vs88":917831,"vs89":917832,"vs90":917833,"vs91":917834,"vs92":917835,"vs93":917836,"vs94":917837,"vs95":917838,"vs96":917839,"vs97":917840,"vs98":917841,"vs99":917842,"vs100":917843,"vs101":917844,"vs102":917845,"vs103":917846,"vs104":917847,"vs105":917848,"vs106":917849,"vs107":917850,"vs108":917851,"vs109":917852,"vs110":917853,"vs111":917854,"vs112":917855,"vs113":917856,"vs114":917857,"vs115":917858,"vs116":917859,"vs117":917860,"vs118":917861,"vs119":917862,"vs120":917863,"vs121":917864,"vs122":917865,"vs123":917866,"vs124":917867,"vs125":917868,"vs126":917869,"vs127":917870,"vs128":917871,"vs129":917872,"vs130":917873,"vs131":917874,"vs132":917875,"vs133":917876,"vs134":917877,"vs135":917878,"vs136":917879,"vs137":917880,"vs138":917881,"vs139":917882,"vs140":917883,"vs141":917884,"vs142":917885,"vs143":917886,"vs144":917887,"vs145":917888,"vs146":917889,"vs147":917890,"vs148":917891,"vs149":917892,"vs150":917893,"vs151":917894,"vs152":917895,"vs153":917896,"vs154":917897,"vs155":917898,"vs156":917899,"vs157":917900,"vs158":917901,"vs159":917902,"vs160":917903,"vs161":917904,"vs162":917905,"vs163":917906,"vs164":917907,"vs165":917908,"vs166":917909,"vs167":917910,"vs168":917911,"vs169":917912,"vs170":917913,"vs171":917914,"vs172":917915,"vs173":917916,"vs174":917917,"vs175":917918,"vs176":917919,"vs177":917920,"vs178":917921,"vs179":917922,"vs180":917923,"vs181":917924,"vs182":917925,"vs183":917926,"vs184":917927,"vs185":917928,"vs186":917929,"vs187":917930,"vs188":917931,"vs189":917932,"vs190":917933,"vs191":917934,"vs192":917935,"vs193":917936,"vs194":917937,"vs195":917938,"vs196":917939,"vs197":917940,"vs198":917941,"vs199":917942,"vs200":917943,"vs201":917944,"vs202":917945,"vs203":917946,"vs204":917947,"vs205":917948,"vs206":917949,"vs207":917950,"vs208":917951,"vs209":917952,"vs210":917953,"vs211":917954,"vs212":917955,"vs213":917956,"vs214":917957,"vs215":917958,"vs216":917959,"vs217":917960,"vs218":917961,"vs219":917962,"vs220":917963,"vs221":917964,"vs222":917965,"vs223":917966,"vs224":917967,"vs225":917968,"vs226":917969,"vs227":917970,"vs228":917971,"vs229":917972,"vs230":917973,"vs231":917974,"vs232":917975,"vs233":917976,"vs234":917977,"vs235":917978,"vs236":917979,"vs237":917980,"vs238":917981,"vs239":917982,"vs240":917983,"vs241":917984,"vs242":917985,"vs243":917986,"vs244":917987,"vs245":917988,"vs246":917989,"vs247":917990,"vs248":917991,"vs249":917992,"vs250":917993,"vs251":917994,"vs252":917995,"vs253":917996,"vs254":917997,"vs255":917998,"vs256":917999}
|
||
# }}}
|
||
|
||
_ASCII_CONTROL_CHARS = {'a':7, 'b':8, 'f': 12, 'n': 10, 'r': 13, 't': 9, 'v': 11}
|
||
_HEX_PAT = /^[a-fA-F0-9]/
|
||
_NUM_PAT = /^[0-9]/
|
||
_GROUP_PAT = /<([^>]+)>/
|
||
_NAME_PAT = /^[a-zA-Z ]/
|
||
|
||
I = IGNORECASE = 2
|
||
L = LOCALE = 4
|
||
M = MULTILINE = 8
|
||
D = DOTALL = 16
|
||
U = UNICODE = 32
|
||
X = VERBOSE = 64
|
||
DEBUG = 128
|
||
A = ASCII = 256
|
||
|
||
supports_unicode = RegExp.prototype.unicode is not undefined
|
||
|
||
_RE_ESCAPE = /[-\/\\^$*+?.()|[\]{}]/g
|
||
|
||
_re_cache_map = {}
|
||
_re_cache_items = v'[]'
|
||
|
||
error = SyntaxError # This is the error JS throws for invalid regexps
|
||
|
||
def _expand(groups, repl, group_name_map):
|
||
i = 0
|
||
|
||
def next():
|
||
nonlocal i
|
||
return v'repl[i++]'
|
||
|
||
def peek():
|
||
return repl[i]
|
||
|
||
def read_digits(count, pat, base, maxval, prefix):
|
||
ans = prefix or ''
|
||
greedy = count is Number.MAX_VALUE
|
||
while count > 0:
|
||
count -= 1
|
||
if not pat.test(peek()):
|
||
if greedy:
|
||
break
|
||
return ans
|
||
ans += next()
|
||
nval = parseInt(ans, base)
|
||
if nval > maxval:
|
||
return ans
|
||
return nval
|
||
|
||
def read_escape_sequence():
|
||
nonlocal i
|
||
q = next()
|
||
if not q or q is '\\':
|
||
return '\\'
|
||
if '"\''.indexOf(q) is not -1:
|
||
return q
|
||
if _ASCII_CONTROL_CHARS.hasOwnProperty(q):
|
||
return String.fromCharCode(_ASCII_CONTROL_CHARS[q])
|
||
if '0' <= q <= '9':
|
||
ans = read_digits(Number.MAX_VALUE, _NUM_PAT, 10, Number.MAX_VALUE, q)
|
||
if type(ans) is 'number':
|
||
return groups[ans] or ''
|
||
return '\\' + ans
|
||
if q is 'g':
|
||
m = _GROUP_PAT.exec(repl[i:])
|
||
if m is not None:
|
||
i += m[0].length
|
||
gn = m[1]
|
||
if isNaN(parseInt(gn, 10)):
|
||
if not Object.prototype.hasOwnProperty.call(group_name_map, gn):
|
||
return ''
|
||
gn = group_name_map[gn][-1]
|
||
return groups[gn] or ''
|
||
if q is 'x':
|
||
code = read_digits(2, _HEX_PAT, 16, 0x10FFFF)
|
||
if type(code) is 'number':
|
||
return String.fromCharCode(code)
|
||
return '\\x' + code
|
||
if q is 'u':
|
||
code = read_digits(4, _HEX_PAT, 16, 0x10FFFF)
|
||
if type(code) is 'number':
|
||
return String.fromCharCode(code)
|
||
return '\\u' + code
|
||
if q is 'U':
|
||
code = read_digits(8, _HEX_PAT, 16, 0x10FFFF)
|
||
if type(code) is 'number':
|
||
if code <= 0xFFFF:
|
||
return String.fromCharCode(code)
|
||
code -= 0x10000
|
||
return String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))
|
||
return '\\U' + code
|
||
if q is 'N' and peek() is '{':
|
||
next()
|
||
name = ''
|
||
while _NAME_PAT.test(peek()):
|
||
name += next()
|
||
if peek() is not '}':
|
||
return '\\N{' + name
|
||
next()
|
||
key = (name or '').toLowerCase()
|
||
if not name or not Object.prototype.hasOwnProperty.call(_ALIAS_MAP, key):
|
||
return '\\N{' + name + '}'
|
||
code = _ALIAS_MAP[key]
|
||
if code <= 0xFFFF:
|
||
return String.fromCharCode(code)
|
||
code -= 0x10000
|
||
return String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))
|
||
|
||
return '\\' + q
|
||
|
||
ans = ch = ''
|
||
while (ch = next()):
|
||
if ch is '\\':
|
||
ans += read_escape_sequence()
|
||
else:
|
||
ans += ch
|
||
return ans
|
||
|
||
def transform_regex(source, flags):
|
||
pos = 0
|
||
previous_backslash = in_class = False
|
||
ans = ''
|
||
group_map = {}
|
||
flags = flags or 0
|
||
group_count = 0
|
||
|
||
while pos < source.length:
|
||
ch = v'source[pos++]'
|
||
if previous_backslash:
|
||
ans += '\\' + ch
|
||
previous_backslash = False
|
||
continue
|
||
|
||
if in_class:
|
||
if ch is ']':
|
||
in_class = False
|
||
ans += ch
|
||
continue
|
||
|
||
if ch is '\\':
|
||
previous_backslash = True
|
||
continue
|
||
|
||
if ch is '[':
|
||
in_class = True
|
||
if source[pos] is ']': # in python the empty set is not allowed, instead []] is the same as [\]]
|
||
pos += 1
|
||
ch = r'[\]'
|
||
elif ch is '(':
|
||
if source[pos] is '?':
|
||
extension = source[pos + 1]
|
||
if extension is '#':
|
||
close = source.indexOf(')', pos + 1)
|
||
if close is -1:
|
||
raise ValueError('Expecting a closing )')
|
||
pos = close + 1
|
||
continue
|
||
if 'aiLmsux'.indexOf(extension) is not -1:
|
||
flag_map = {'a':ASCII, 'i':IGNORECASE, 'L':LOCALE, 'm':MULTILINE, 's':DOTALL, 'u':UNICODE, 'x':VERBOSE}
|
||
close = source.indexOf(')', pos + 1)
|
||
if close is -1:
|
||
raise SyntaxError('Expecting a closing )')
|
||
flgs = source[pos+1:close]
|
||
for v'var i = 0; i < flgs.length; i++':
|
||
q = flgs[i] # noqa:undef
|
||
if not flag_map.hasOwnProperty(q):
|
||
raise SyntaxError('Invalid flag: ' + q)
|
||
flags |= flag_map[q]
|
||
pos = close + 1
|
||
continue
|
||
if extension is '<':
|
||
raise SyntaxError('Look behind assertions are not supported in JavaScript')
|
||
if extension is '(':
|
||
raise SyntaxError('Group existence assertions are not supported in JavaScript')
|
||
if extension is 'P':
|
||
pos += 2
|
||
q = source[pos]
|
||
if q is '<':
|
||
close = source.indexOf('>', pos)
|
||
if close is -1:
|
||
raise SyntaxError('Named group not closed, expecting >')
|
||
name = source[pos+1:close]
|
||
if not Object.prototype.hasOwnProperty.call(group_map, name):
|
||
group_map[name] = v'[]'
|
||
group_map[name].push(v'++group_count')
|
||
pos = close + 1
|
||
elif q is '=':
|
||
close = source.indexOf(')', pos)
|
||
if close is -1:
|
||
raise SyntaxError('Named group back-reference not closed, expecting a )')
|
||
name = source[pos+1:close]
|
||
if not isNaN(parseInt(name, 10)):
|
||
ans += '\\' + name
|
||
else:
|
||
if not Object.prototype.hasOwnProperty.call(group_map, name):
|
||
raise SyntaxError('Invalid back-reference. The named group: ' + name + ' has not yet been defined.')
|
||
ans += '\\' + group_map[name][-1]
|
||
pos = close + 1
|
||
continue
|
||
else:
|
||
raise SyntaxError('Expecting < or = after (?P')
|
||
else:
|
||
group_count += 1
|
||
elif ch is '.' and (flags & DOTALL):
|
||
ans += r'[\s\S]' # JavaScript has no DOTALL
|
||
continue
|
||
|
||
ans += ch
|
||
|
||
return ans, flags, group_map
|
||
|
||
class MatchObject:
|
||
|
||
def __init__(self, regex, match, pos, endpos):
|
||
self.re = regex
|
||
self.string = match.input
|
||
self._start_pos = match.index
|
||
self._groups = match
|
||
self.pos, self.endpos = pos, endpos
|
||
|
||
def _compute_extents(self):
|
||
# compute start/end for each group
|
||
match = self._groups
|
||
self._start = v'Array(match.length)'
|
||
self._end = v'Array(match.length)'
|
||
self._start[0] = self._start_pos
|
||
self._end[0] = self._start_pos + match[0].length
|
||
offset = self._start_pos
|
||
extent = match[0]
|
||
loc = 0
|
||
for v'var i = 1; i < match.length; i++':
|
||
g = match[i]
|
||
loc = extent.indexOf(g, loc)
|
||
if loc is -1:
|
||
self._start[i] = self._start[i-1]
|
||
self._end[i] = self._end[i-1]
|
||
else:
|
||
self._start[i] = offset + loc
|
||
loc += g.length
|
||
self._end[i] = offset + loc # noqa:undef
|
||
|
||
def groups(self, defval=None):
|
||
ans = v'[]'
|
||
for v'var i = 1; i < self._groups.length; i++':
|
||
val = self._groups[i] # noqa:undef
|
||
if val is undefined:
|
||
val = defval
|
||
ans.push(val)
|
||
return ans
|
||
|
||
def _group_number(self, g):
|
||
if type(g) is 'number':
|
||
return g
|
||
if Object.prototype.hasOwnProperty.call(self.re.group_name_map, g):
|
||
return self.re.group_name_map[g][-1]
|
||
return g
|
||
|
||
def _group_val(self, q, defval):
|
||
val = undefined
|
||
if type(q) is 'number' and -1 < q < self._groups.length:
|
||
val = self._groups[q]
|
||
else:
|
||
if Object.prototype.hasOwnProperty.call(self.re.group_name_map, q):
|
||
val = self._groups[self.re.group_name_map[q][-1]]
|
||
if val is undefined:
|
||
val = defval
|
||
return val
|
||
|
||
def group(self):
|
||
if arguments.length is 0:
|
||
return self._groups[0]
|
||
ans = v'[]'
|
||
for v'var i = 0; i < arguments.length; i++':
|
||
q = arguments[i] # noqa:undef
|
||
ans.push(self._group_val(q, None))
|
||
return ans[0] if ans.length is 1 else ans
|
||
|
||
def start(self, g):
|
||
if self._start is undefined:
|
||
self._compute_extents()
|
||
val = self._start[self._group_number(g or 0)]
|
||
if val is undefined:
|
||
val = -1
|
||
return val
|
||
|
||
def end(self, g):
|
||
if self._end is undefined:
|
||
self._compute_extents()
|
||
val = self._end[self._group_number(g or 0)]
|
||
if val is undefined:
|
||
val = -1
|
||
return val
|
||
|
||
def span(self, g):
|
||
return [self.start(g), self.end(g)]
|
||
|
||
def expand(self, repl):
|
||
return _expand(repl, this._groups, this.re.group_name_map)
|
||
|
||
def groupdict(self, defval=None):
|
||
gnm = self.re.group_name_map
|
||
names = Object.keys(gnm)
|
||
ans = {}
|
||
for v"var i = 0; i < names.length; i++":
|
||
name = names[i] # noqa:undef
|
||
if Object.prototype.hasOwnProperty.call(gnm, name):
|
||
val = self._groups[gnm[name][-1]]
|
||
if val is undefined:
|
||
val = defval
|
||
ans[name] = val
|
||
return ans
|
||
|
||
def captures(self, group_name):
|
||
ans = []
|
||
if not Object.prototype.hasOwnProperty.call(self.re.group_name_map, group_name):
|
||
return ans
|
||
groups = self.re.group_name_map[group_name]
|
||
for v'var i = 0; i < groups.length; i++':
|
||
val = self._groups[groups[i]] # noqa:undef
|
||
if val is not undefined:
|
||
ans.push(val)
|
||
return ans
|
||
|
||
def capturesdict(self):
|
||
gnm = self.re.group_name_map
|
||
names = Object.keys(gnm)
|
||
ans = {}
|
||
for v'var i = 0; i < names.length; i++':
|
||
name = names[i] # noqa:undef
|
||
ans[name] = self.captures(name)
|
||
return ans
|
||
|
||
class RegexObject:
|
||
|
||
def __init__(self, pattern, flags):
|
||
self.pattern = pattern.source if isinstance(pattern, RegExp) else pattern
|
||
self.js_pattern, self.flags, self.group_name_map = transform_regex(self.pattern, flags)
|
||
|
||
modifiers = ''
|
||
if self.flags & IGNORECASE: modifiers += 'i'
|
||
if self.flags & MULTILINE: modifiers += 'm'
|
||
if not (self.flags & ASCII) and supports_unicode:
|
||
modifiers += 'u'
|
||
self._modifiers = modifiers + 'g'
|
||
self._pattern = RegExp(self.js_pattern, self._modifiers)
|
||
|
||
def _do_search(self, pat, string, pos, endpos):
|
||
pat.lastIndex = 0
|
||
if endpos is not None:
|
||
string = string[:endpos]
|
||
while True:
|
||
n = pat.exec(string)
|
||
if n is None:
|
||
return None
|
||
if n.index >= pos:
|
||
return MatchObject(self, n, pos, endpos)
|
||
|
||
def search(self, string, pos=0, endpos=None):
|
||
return self._do_search(self._pattern, string, pos, endpos)
|
||
|
||
def match(self, string, pos=0, endpos=None):
|
||
return self._do_search(RegExp('^' + self.js_pattern, self._modifiers), string, pos, endpos)
|
||
|
||
def split(self, string, maxsplit=0):
|
||
self._pattern.lastIndex = 0
|
||
return string.split(self._pattern, maxsplit or undefined)
|
||
|
||
def findall(self, string):
|
||
self._pattern.lastIndex = 0
|
||
return ρσ_list_decorate(string.match(self._pattern) or v'[]')
|
||
|
||
def finditer(self, string):
|
||
pat = RegExp(this._pattern.source, this._modifiers) # We have to do this since lastIndex is mutable
|
||
return {
|
||
'_string':string,
|
||
'_r': pat,
|
||
'_self': self,
|
||
ρσ_iterator_symbol: def (): return this;,
|
||
'next': def ():
|
||
m = this._r.exec(this._string)
|
||
if m is None:
|
||
return {'done':True}
|
||
return {'done':False, 'value':MatchObject(this._self, m, 0, None)}
|
||
}
|
||
|
||
def subn(self, repl, string, count=0):
|
||
expand = _expand
|
||
if type(repl) is 'function':
|
||
expand = def(m, repl, gnm): return '' + repl(MatchObject(self, m, 0, None))
|
||
this._pattern.lastIndex = 0
|
||
num = 0
|
||
matches = v'[]'
|
||
|
||
while count < 1 or num < count:
|
||
m = this._pattern.exec(string)
|
||
if m is None:
|
||
break
|
||
matches.push(m)
|
||
num += 1
|
||
|
||
for v'var i = matches.length - 1; i > -1; i--':
|
||
m = matches[i] # noqa:undef
|
||
start = m.index
|
||
end = start + m[0].length
|
||
string = string[:start] + expand(m, repl, self.group_name_map) + string[end:]
|
||
return string, matches.length
|
||
|
||
def sub(self, repl, string, count=0):
|
||
return self.subn(repl, string, count)[0]
|
||
|
||
def _get_from_cache(pattern, flags):
|
||
if isinstance(pattern, RegExp):
|
||
pattern = pattern.source
|
||
key = JSON.stringify(v'[pattern, flags]')
|
||
if Object.prototype.hasOwnProperty.call(_re_cache_map, key):
|
||
return _re_cache_map[key]
|
||
if _re_cache_items.length >= 100:
|
||
v'delete _re_cache_map[_re_cache_items.shift()]'
|
||
ans = RegexObject(pattern, flags)
|
||
_re_cache_map[key] = ans
|
||
_re_cache_items.push(key)
|
||
return ans
|
||
|
||
def compile(pattern, flags=0):
|
||
return _get_from_cache(pattern, flags)
|
||
|
||
def search(pattern, string, flags=0):
|
||
return _get_from_cache(pattern, flags).search(string)
|
||
|
||
def match(pattern, string, flags=0):
|
||
return _get_from_cache(pattern, flags).match(string)
|
||
|
||
def split(pattern, string, maxsplit=0, flags=0):
|
||
return _get_from_cache(pattern, flags).split(string)
|
||
|
||
def findall(pattern, string, flags=0):
|
||
return _get_from_cache(pattern, flags).findall(string)
|
||
|
||
def finditer(pattern, string, flags=0):
|
||
return _get_from_cache(pattern, flags).finditer(string)
|
||
|
||
def sub(pattern, repl, string, count=0, flags=0):
|
||
return _get_from_cache(pattern, flags).sub(repl, string, count)
|
||
|
||
def subn(pattern, repl, string, count=0, flags=0):
|
||
return _get_from_cache(pattern, flags).subn(repl, string, count)
|
||
|
||
def escape(string):
|
||
return string.replace(_RE_ESCAPE, '\\$&')
|
||
|
||
def purge():
|
||
nonlocal _re_cache_map, _re_cache_items
|
||
_re_cache_map = {}
|
||
_re_cache_items = v'[]'
|