4242
4343import static com .oracle .graal .python .runtime .exception .PythonErrorType .RuntimeError ;
4444import static com .oracle .graal .python .runtime .exception .PythonErrorType .TypeError ;
45+ import static com .oracle .graal .python .runtime .exception .PythonErrorType .ValueError ;
4546
4647import java .io .UnsupportedEncodingException ;
4748import java .util .List ;
48- import java .util .regex .Matcher ;
4949import java .util .regex .Pattern ;
5050
5151import com .oracle .graal .python .builtins .Builtin ;
7878import com .oracle .truffle .api .interop .UnsupportedTypeException ;
7979import com .oracle .truffle .api .nodes .Node ;
8080import com .oracle .truffle .api .profiles .BranchProfile ;
81+ import com .oracle .truffle .regex .RegexSyntaxException ;
8182
8283@ CoreFunctions (defineModule = "_sre" )
8384public class SREModuleBuiltins extends PythonBuiltins {
@@ -86,106 +87,6 @@ protected List<? extends NodeFactory<? extends PythonBuiltinBaseNode>> getNodeFa
8687 return SREModuleBuiltinsFactory .getFactories ();
8788 }
8889
89- @ Builtin (name = "tregex_preprocess_for_verbose" , fixedNumOfPositionalArgs = 1 )
90- @ GenerateNodeFactory
91- abstract static class TRegexPreprocessVerboseNode extends PythonUnaryBuiltinNode {
92-
93- @ Specialization
94- Object run (PString str ) {
95- return run (str .getValue ());
96- }
97-
98- @ Specialization
99- Object run (String str ) {
100- return replaceAll (str );
101- }
102-
103- /**
104- * removes comments and whitespaces if they are not in a character class
105- */
106- @ TruffleBoundary (transferToInterpreterOnException = false , allowInlining = true )
107- private static String replaceAll (String r ) {
108- StringBuffer sb = new StringBuffer (r );
109- int charclassNestingLevel = 0 ;
110- boolean inComment = false ;
111- for (int i = 0 ; i < sb .length ();) {
112- char c = sb .charAt (i );
113- if (c == '[' && !inComment ) {
114- charclassNestingLevel ++;
115- } else if (c == ']' && !inComment ) {
116- charclassNestingLevel --;
117- } else if (c == '#' && charclassNestingLevel == 0 ) {
118- inComment = true ;
119- } else if (c == '\n' && inComment ) {
120- inComment = false ;
121- }
122- if (inComment || (Character .isWhitespace (c ) && charclassNestingLevel == 0 )) {
123- sb .deleteCharAt (i );
124- } else {
125- i ++;
126- }
127- }
128-
129- for (int idx = sb .indexOf ("\\ Z" ); idx != -1 ; idx = sb .indexOf ("\\ Z" , idx + 2 )) {
130- sb .replace (idx , idx + 2 , "$" );
131- }
132-
133- return sb .toString ();
134- }
135-
136- @ Fallback
137- Object run (Object o ) {
138- throw raise (PythonErrorType .TypeError , "expected string, not %p" , o );
139- }
140-
141- }
142-
143- @ Builtin (name = "tregex_preprocess_default" , fixedNumOfPositionalArgs = 1 )
144- @ GenerateNodeFactory
145- abstract static class TRegexPreprocessDefaultNode extends PythonUnaryBuiltinNode {
146- @ CompilationFinal private Pattern namedCaptGroupPattern ;
147-
148- @ Specialization
149- Object run (PString str ) {
150- return run (str .getValue ());
151- }
152-
153- @ Specialization
154- Object run (String str ) {
155- if (namedCaptGroupPattern == null ) {
156- CompilerDirectives .transferToInterpreterAndInvalidate ();
157- namedCaptGroupPattern = Pattern .compile ("\\ ?P\\ <(?<GRPNAME>\\ w*)\\ >" );
158- }
159- return replaceAll (str );
160- }
161-
162- /**
163- * replaces named capturing groups {@code ?P<name>} by {@code ?<name>} and replaces
164- * end-of-string {@code \Z} by {@code $}.
165- */
166- @ TruffleBoundary (transferToInterpreterOnException = false , allowInlining = true )
167- private String replaceAll (String r ) {
168- Matcher matcher0 = namedCaptGroupPattern .matcher (r );
169- StringBuffer sb = new StringBuffer ();
170- while (matcher0 .find ()) {
171- matcher0 .appendReplacement (sb , "?<" + matcher0 .group ("GRPNAME" ) + ">" );
172- }
173- matcher0 .appendTail (sb );
174-
175- for (int idx = sb .indexOf ("\\ Z" ); idx != -1 ; idx = sb .indexOf ("\\ Z" , idx + 2 )) {
176- sb .replace (idx , idx + 2 , "$" );
177- }
178-
179- return sb .toString ();
180- }
181-
182- @ Fallback
183- Object run (Object o ) {
184- throw raise (PythonErrorType .TypeError , "expected string, not %p" , o );
185- }
186-
187- }
188-
18990 /**
19091 * Replaces any <it>quoted</it> escape sequence like {@code "\\n"} (two characters; backslash +
19192 * 'n') by its single character like {@code "\n"} (one character; newline).
@@ -257,45 +158,63 @@ private SequenceStorageNodes.ToByteArrayNode getToByteArrayNode() {
257158
258159 }
259160
260- @ Builtin (name = "tregex_call_safe " , fixedNumOfPositionalArgs = 3 )
161+ @ Builtin (name = "tregex_call_compile " , fixedNumOfPositionalArgs = 3 )
261162 @ TypeSystemReference (PythonArithmeticTypes .class )
262163 @ GenerateNodeFactory
263- abstract static class TRegexCallSafe extends PythonBuiltinNode {
164+ abstract static class TRegexCallCompile extends PythonBuiltinNode {
264165
265- private Object doIt (TruffleObject callable , String arg1 , Object arg2 ,
266- BranchProfile runtimeError ,
267- BranchProfile typeError , Node invokeNode ) {
166+ @ Specialization (guards = "isForeignObject(callable)" )
167+ Object call (TruffleObject callable , Object arg1 , Object arg2 ,
168+ @ Cached ("create()" ) BranchProfile syntaxError ,
169+ @ Cached ("create()" ) BranchProfile typeError ,
170+ @ Cached ("createExecute()" ) Node invokeNode ) {
268171 try {
269172 return ForeignAccess .sendExecute (invokeNode , callable , new Object []{arg1 , arg2 });
270173 } catch (ArityException | UnsupportedTypeException | UnsupportedMessageException e ) {
271174 typeError .enter ();
272175 throw raise (TypeError , "%s" , e );
273- } catch (RuntimeException e ) {
274- runtimeError .enter ();
275- throw raise (RuntimeError , "%s" , e );
176+ } catch (RegexSyntaxException e ) {
177+ syntaxError .enter ();
178+ if (e .getPosition () == -1 ) {
179+ throw raise (ValueError , "%s" , e .getReason ());
180+ } else {
181+ throw raise (ValueError , "%s at position %d" , e .getReason (), e .getPosition ());
182+ }
276183 }
277184 }
278185
279- @ Specialization (guards = "isForeignObject(callable)" )
280- Object call (TruffleObject callable , String arg1 , String arg2 ,
281- @ Cached ("create()" ) BranchProfile runtimeError ,
282- @ Cached ("create()" ) BranchProfile typeError ,
283- @ Cached ("createExecute()" ) Node invokeNode ) {
284- return doIt (callable , arg1 , arg2 , runtimeError , typeError , invokeNode );
186+ @ SuppressWarnings ("unused" )
187+ @ Fallback
188+ Object call (Object callable , Object arg1 , Object arg2 ) {
189+ throw raise (RuntimeError , "invalid arguments passed to tregex_call_compile" );
190+ }
191+
192+ protected static Node createExecute () {
193+ return Message .EXECUTE .createNode ();
285194 }
195+ }
196+
197+ @ Builtin (name = "tregex_call_exec" , fixedNumOfPositionalArgs = 3 )
198+ @ TypeSystemReference (PythonArithmeticTypes .class )
199+ @ GenerateNodeFactory
200+ abstract static class TRegexCallExec extends PythonBuiltinNode {
286201
287202 @ Specialization (guards = "isForeignObject(callable)" )
288- Object call (TruffleObject callable , String arg1 , int arg2 ,
289- @ Cached ("create()" ) BranchProfile runtimeError ,
203+ Object call (TruffleObject callable , Object arg1 , Number arg2 ,
290204 @ Cached ("create()" ) BranchProfile typeError ,
291205 @ Cached ("createExecute()" ) Node invokeNode ) {
292- return doIt (callable , arg1 , arg2 , runtimeError , typeError , invokeNode );
206+ try {
207+ return ForeignAccess .sendExecute (invokeNode , callable , new Object []{arg1 , arg2 });
208+ } catch (ArityException | UnsupportedTypeException | UnsupportedMessageException e ) {
209+ typeError .enter ();
210+ throw raise (TypeError , "%s" , e );
211+ }
293212 }
294213
295214 @ SuppressWarnings ("unused" )
296215 @ Fallback
297216 Object call (Object callable , Object arg1 , Object arg2 ) {
298- throw raise (RuntimeError );
217+ throw raise (RuntimeError , "invalid arguments passed to tregex_call_exec" );
299218 }
300219
301220 protected static Node createExecute () {
0 commit comments