3838# SOFTWARE.
3939
4040from mmap import mmap
41+ from array import array
4142
4243_mappingproxy = type (type .__dict__ )
4344
@@ -76,7 +77,7 @@ def __init__(self, compiled_pattern, flags, sticky):
7677 self .__compiled_pattern__ = compiled_pattern
7778 self .__sticky__ = sticky
7879 self .pattern = compiled_pattern .pattern
79- self .flags = flags
80+ self .flags = { name : bool ( flags & flag ) for flag , name in FLAG_NAMES }
8081 self .groupCount = 1 + compiled_pattern .groups
8182 self .groups = _NamedCaptureGroups (compiled_pattern .groupindex )
8283
@@ -110,7 +111,7 @@ def fallback_compiler(pattern, flags):
110111
111112 compiled_pattern = _sre_compile (pattern , bit_flags )
112113
113- return _ExecutablePattern (compiled_pattern , flags , sticky )
114+ return _ExecutablePattern (compiled_pattern , bit_flags , sticky )
114115
115116def _new_compile (p , flags = 0 ):
116117 if _with_tregex and isinstance (p , (str , bytes )):
@@ -145,19 +146,19 @@ def setup(sre_compiler, error_class, flags_table):
145146FLAG_DEBUG = 128
146147FLAG_ASCII = 256
147148FLAG_NAMES = [
148- (FLAG_TEMPLATE , "re. TEMPLATE" ),
149- (FLAG_IGNORECASE , "re. IGNORECASE" ),
150- (FLAG_LOCALE , "re. LOCALE" ),
151- (FLAG_MULTILINE , "re. MULTILINE" ),
152- (FLAG_DOTALL , "re. DOTALL" ),
153- (FLAG_UNICODE , "re. UNICODE" ),
154- (FLAG_VERBOSE , "re. VERBOSE" ),
155- (FLAG_DEBUG , "re. DEBUG" ),
156- (FLAG_ASCII , "re. ASCII" ),
149+ (FLAG_TEMPLATE , "TEMPLATE" ),
150+ (FLAG_IGNORECASE , "IGNORECASE" ),
151+ (FLAG_LOCALE , "LOCALE" ),
152+ (FLAG_MULTILINE , "MULTILINE" ),
153+ (FLAG_DOTALL , "DOTALL" ),
154+ (FLAG_UNICODE , "UNICODE" ),
155+ (FLAG_VERBOSE , "VERBOSE" ),
156+ (FLAG_DEBUG , "DEBUG" ),
157+ (FLAG_ASCII , "ASCII" ),
157158]
158159
159160
160- class SRE_Match ():
161+ class Match ():
161162 def __init__ (self , pattern , pos , endpos , result , input_str , compiled_regex ):
162163 self .__result = result
163164 self .__compiled_regex = compiled_regex
@@ -191,10 +192,12 @@ def __getitem__(self, item):
191192
192193 def __groupidx (self , idx ):
193194 try :
194- if isinstance (idx , str ):
195+ if hasattr (idx , '__index__' ):
196+ int_idx = int (idx )
197+ if 0 <= int_idx < self .__compiled_regex .groupCount :
198+ return int_idx
199+ else :
195200 return self .__compiled_regex .groups [idx ]
196- elif 0 <= idx < self .__compiled_regex .groupCount :
197- return idx
198201 except Exception :
199202 pass
200203 raise IndexError ("no such group" )
@@ -204,8 +207,10 @@ def __group(self, idx, default=None):
204207 start = self .__result .getStart (idxarg )
205208 if start < 0 :
206209 return default
207- else :
210+ elif isinstance ( self . __input_str , str ) :
208211 return self .__input_str [start :self .__result .getEnd (idxarg )]
212+ else :
213+ return bytes (self .__input_str [start :self .__result .getEnd (idxarg )])
209214
210215 def groupdict (self , default = None ):
211216 groups = self .__compiled_regex .groups
@@ -221,6 +226,14 @@ def start(self, groupnum=0):
221226 idxarg = self .__groupidx (groupnum )
222227 return self .__result .getStart (idxarg )
223228
229+ def expand (self , template ):
230+ import re
231+ return re ._expand (self .__re , self , template )
232+
233+ @property
234+ def regs (self ):
235+ return tuple (self .span (i ) for i in range (self .__compiled_regex .groupCount ))
236+
224237 @property
225238 def string (self ):
226239 return self .__input_str
@@ -252,7 +265,13 @@ def lastindex(self):
252265 return lastindex
253266
254267 def __repr__ (self ):
255- return "<re.Match object; span=%r, match=%r>" % (self .span (), self .group ())
268+ return "<%s object; span=%r, match=%r>" % (type (self ).__name__ , self .span (), self .group ())
269+
270+ def __copy__ (self ):
271+ return self
272+
273+ def __deepcopy__ (self , memo ):
274+ return self
256275
257276def _append_end_assert (pattern ):
258277 if isinstance (pattern , str ):
@@ -261,18 +280,18 @@ def _append_end_assert(pattern):
261280 return pattern if pattern .endswith (rb"\Z" ) else pattern + rb"\Z"
262281
263282def _is_bytes_like (object ):
264- return isinstance (object , (bytes , bytearray , memoryview , mmap ))
283+ return isinstance (object , (bytes , bytearray , memoryview , array , mmap ))
265284
266- class SRE_Pattern ():
285+ class Pattern ():
267286 def __init__ (self , pattern , flags ):
268287 self .__binary = isinstance (pattern , bytes )
269288 self .pattern = pattern
270- self .flags = flags
289+ self .__input_flags = flags
271290 flags_str = []
272- for char ,flag in FLAGS .items ():
291+ for char , flag in FLAGS .items ():
273292 if flags & flag :
274293 flags_str .append (char )
275- self .flags_str = "" .join (flags_str )
294+ self .__flags_str = "" .join (flags_str )
276295 self .__compiled_regexes = {}
277296 compiled_regex = self .__tregex_compile (self .pattern )
278297 self .groups = compiled_regex .groupCount - 1
@@ -283,6 +302,19 @@ def __init__(self, pattern, flags):
283302 group_names = dir (groups )
284303 self .groupindex = _mappingproxy ({name : groups [name ] for name in group_names })
285304
305+ @property
306+ def flags (self ):
307+ # Flags can be spcified both in the flag argument or inline in the regex. Extract them back from the regex
308+ flags = self .__input_flags
309+ regex_flags = self .__tregex_compile (self .pattern ).flags
310+ for flag , name in FLAG_NAMES :
311+ try :
312+ if regex_flags [name ]:
313+ flags |= flag
314+ except KeyError :
315+ pass
316+ return flags
317+
286318 def __check_input_type (self , input ):
287319 if not isinstance (input , str ) and not _is_bytes_like (input ):
288320 raise TypeError ("expected string or bytes-like object" )
@@ -298,7 +330,7 @@ def __check_pos(pos):
298330
299331 def __tregex_compile (self , pattern , flags = None ):
300332 if flags is None :
301- flags = self .flags_str
333+ flags = self .__flags_str
302334 if (pattern , flags ) not in self .__compiled_regexes :
303335 try :
304336 self .__compiled_regexes [(pattern , flags )] = tregex_compile_internal (pattern , flags , fallback_compiler )
@@ -317,7 +349,7 @@ def __repr__(self):
317349 for code , name in FLAG_NAMES :
318350 if flags & code :
319351 flags -= code
320- flag_items .append (name )
352+ flag_items .append (f're. { name } ' )
321353 if flags != 0 :
322354 flag_items .append ("0x%x" % flags )
323355 if len (flag_items ) == 0 :
@@ -331,15 +363,21 @@ def __repr__(self):
331363 def __eq__ (self , other ):
332364 if self is other :
333365 return True
334- if type (other ) != SRE_Pattern :
366+ if type (other ) != Pattern :
335367 return NotImplemented
336368 return self .pattern == other .pattern and self .flags == other .flags
337369
338370 def __hash__ (self ):
339371 return hash (self .pattern ) * 31 ^ hash (self .flags )
340372
373+ def __copy__ (self ):
374+ return self
375+
376+ def __deepcopy__ (self , memo ):
377+ return self
378+
341379 def _search (self , pattern , string , pos , endpos , sticky = False ):
342- pattern = self .__tregex_compile (pattern , self .flags_str + ("y" if sticky else "" ))
380+ pattern = self .__tregex_compile (pattern , self .__flags_str + ("y" if sticky else "" ))
343381 input_str = string
344382 if endpos == - 1 or endpos >= len (string ):
345383 endpos = len (string )
@@ -348,7 +386,7 @@ def _search(self, pattern, string, pos, endpos, sticky=False):
348386 input_str = string [:endpos ]
349387 result = tregex_call_exec (pattern .exec , input_str , min (pos , endpos % len (string ) + 1 ))
350388 if result .isMatch :
351- return SRE_Match (self , pos , endpos , result , input_str , pattern )
389+ return Match (self , pos , endpos , result , input_str , pattern )
352390 else :
353391 return None
354392
@@ -389,7 +427,7 @@ def finditer(self, string, pos=0, endpos=-1):
389427 if not result .isMatch :
390428 break
391429 else :
392- yield SRE_Match (self , pos , endpos , result , string , compiled_regex )
430+ yield Match (self , pos , endpos , result , string , compiled_regex )
393431 no_progress = (result .getStart (0 ) == result .getEnd (0 ))
394432 pos = result .getEnd (0 ) + no_progress
395433 return
@@ -411,7 +449,7 @@ def findall(self, string, pos=0, endpos=-1):
411449 elif compiled_regex .groupCount == 2 :
412450 matchlist .append (self .__sanitize_out_type (string [result .getStart (1 ):result .getEnd (1 )]))
413451 else :
414- matchlist .append (tuple (map (self .__sanitize_out_type , SRE_Match (self , pos , endpos , result , string , compiled_regex ).groups ())))
452+ matchlist .append (tuple (map (self .__sanitize_out_type , Match (self , pos , endpos , result , string , compiled_regex ).groups ())))
415453 no_progress = (result .getStart (0 ) == result .getEnd (0 ))
416454 pos = result .getEnd (0 ) + no_progress
417455 return matchlist
@@ -433,11 +471,10 @@ def subn(self, repl, string, count=0):
433471 else :
434472 literal = b'\\ ' not in repl
435473 if not literal :
436- import sre_parse
437- template = sre_parse .parse_template (repl , self )
438-
439- def repl (match ):
440- return sre_parse .expand_template (template , match )
474+ import re
475+ repl = re ._subx (self , repl )
476+ if not callable (repl ):
477+ literal = True
441478
442479 while (count == 0 or n < count ) and pos <= len (string ):
443480 match_result = tregex_call_exec (pattern .exec , string , pos )
@@ -450,7 +487,7 @@ def repl(match):
450487 if literal :
451488 result .append (repl )
452489 else :
453- _srematch = SRE_Match (self , pos , - 1 , match_result , string , pattern )
490+ _srematch = Match (self , pos , - 1 , match_result , string , pattern )
454491 _repl = repl (_srematch )
455492 result .append (_repl )
456493 pos = end
@@ -492,8 +529,37 @@ def split(self, string, maxsplit=0):
492529 result .append (self .__sanitize_out_type (string [collect_pos :]))
493530 return result
494531
532+ def scanner (self , string , pos = 0 , endpos = None ):
533+ return SREScanner (self , string , pos , endpos )
534+
535+
536+ class SREScanner (object ):
537+ def __init__ (self , pattern , string , start , end ):
538+ self .pattern = pattern
539+ self ._string = string
540+ self ._start = start
541+ self ._end = end
542+
543+ def _match_search (self , matcher ):
544+ if self ._start > len (self ._string ):
545+ return None
546+ match = matcher (self ._string , self ._start , self ._end )
547+ if match is None :
548+ self ._start += 1
549+ else :
550+ self ._start = match .end ()
551+ if match .start () == self ._start :
552+ self ._start += 1
553+ return match
554+
555+ def match (self ):
556+ return self ._match_search (self .pattern .match )
557+
558+ def search (self ):
559+ return self ._match_search (self .pattern .search )
560+
495561
496- _t_compile = SRE_Pattern
562+ _t_compile = Pattern
497563
498564def compile (pattern , flags , code , groups , groupindex , indexgroup ):
499565 import _cpython_sre
0 commit comments