@@ -62,6 +62,11 @@ public class FormatStringLiteralNode extends LiteralNode {
6262 static final String ERROR_MESSAGE_UNTERMINATED_STRING = "f-string: unterminated string" ;
6363 static final String ERROR_MESSAGE_INVALID_SYNTAX = "f-string: invalid syntax" ;
6464 static final String ERROR_MESSAGE_BACKSLASH_IN_EXPRESSION = "f-string expression part cannot include a backslash" ;
65+ static final String ERROR_MESSAGE_HASH_IN_EXPRESSION = "f-string expression part cannot include '#'" ;
66+ static final String ERROR_MESSAGE_CLOSING_PAR_DOES_NOT_MATCH = "f-string: closing parenthesis '%c' does not match opening parenthesis '%c'" ;
67+ static final String ERROR_MESSAGE_UNMATCHED_PAR = "f-string: unmatched '%c'" ;
68+ static final String ERROR_MESSAGE_TOO_MANY_NESTED_PARS = "f-string: too many nested parenthesis" ;
69+ static final String ERROR_MESSAGE_EXPECTING_CLOSING_BRACE = "f-string: expecting '}'" ;
6570
6671 private static final String EMPTY_STRING = "" ;
6772
@@ -145,7 +150,7 @@ private static String getText(StringBuilder result) {
145150
146151 private void parse (VirtualFrame frame ) {
147152 // create tokens
148- tokens = createTokens (this , values , true );
153+ tokens = createTokens (this , values );
149154 // create sources from tokens, that marks expressions
150155 String [] expressionSources = createExpressionSources (values , tokens , 0 , tokens .length );
151156 // and create the expressions
@@ -243,6 +248,8 @@ protected static String[] createExpressionSources(StringPart[] values, int[][] t
243248 private static final int STATE_EXPRESSION = 5 ; // in {}
244249 private static final int STATE_UNKNOWN = 6 ;
245250
251+ private static final int MAX_PAR_NESTING = 200 ;
252+
246253 // protected for testing
247254 /**
248255 * This is the parser of the fstring. As result is a list of tokens, when a token is int array
@@ -256,17 +263,16 @@ protected static String[] createExpressionSources(StringPart[] values, int[][] t
256263 *
257264 * @param node it's needed for raising syntax errors
258265 * @param values this part of text will be parsed
259- * @param topLevel if there is called recursion on topLevel = false, then the syntax error is
260- * raised
261266 * @return a list of tokens
262267 */
263- protected static int [][] createTokens (FormatStringLiteralNode node , StringPart [] values , boolean topLevel ) {
268+ protected static int [][] createTokens (FormatStringLiteralNode node , StringPart [] values ) {
264269 int index ;
265270 int state = STATE_TEXT ;
266271 int start = 0 ;
267272
268273 int braceLevel = 0 ;
269274 int braceLevelInExpression = 0 ;
275+ char [] bracesInExpression = new char [MAX_PAR_NESTING ];
270276 List <int []> resultParts = new ArrayList <>(values .length );
271277 for (int valueIndex = 0 ; valueIndex < values .length ; valueIndex ++) {
272278 StringPart value = values [valueIndex ];
@@ -276,6 +282,7 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
276282 String text = value .text ;
277283 int len = text .length ();
278284 index = 0 ;
285+ start = 0 ;
279286 while (index < len ) {
280287 char ch = text .charAt (index );
281288 switch (state ) {
@@ -328,8 +335,24 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
328335 case STATE_EXPRESSION :
329336 switch (ch ) {
330337 case '{' :
331-
338+ case '(' :
339+ case '[' :
340+ bracesInExpression [braceLevelInExpression ] = ch ;
332341 braceLevelInExpression ++;
342+ if (braceLevelInExpression >= MAX_PAR_NESTING ) {
343+ raiseInvalidSyntax (node , ERROR_MESSAGE_TOO_MANY_NESTED_PARS );
344+ }
345+ break ;
346+ case ')' :
347+ case ']' :
348+ if (braceLevelInExpression == 0 ) {
349+ raiseInvalidSyntax (node , ERROR_MESSAGE_UNMATCHED_PAR , ch );
350+ }
351+ braceLevelInExpression --;
352+ char expected = ch == ')' ? '(' : '[' ;
353+ if (bracesInExpression [braceLevelInExpression ] != expected ) {
354+ raiseUnmatchingClosingPar (node , bracesInExpression [braceLevelInExpression ], ch );
355+ }
333356 break ;
334357 case '}' :
335358 if (braceLevelInExpression == 0 ) {
@@ -341,6 +364,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
341364 start = index + 1 ;
342365 } else {
343366 braceLevelInExpression --;
367+ if (bracesInExpression [braceLevelInExpression ] != '{' ) {
368+ raiseUnmatchingClosingPar (node , bracesInExpression [braceLevelInExpression ], '}' );
369+ }
344370 }
345371 break ;
346372 case '\'' :
@@ -383,6 +409,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
383409 state = STATE_AFTER_EXCLAMATION ;
384410 break ;
385411 case ':' :
412+ if (braceLevelInExpression > 0 ) {
413+ break ;
414+ }
386415 int [] specifierValue ;
387416 if (start < index ) {
388417 // cases like {3:spec}
@@ -406,7 +435,7 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
406435 braceLevelInSpecifier --;
407436 if (braceLevelInSpecifier == -1 ) {
408437 if (start < index ) {
409- int [][] specifierParts = createTokens (node , new StringPart []{new StringPart (text .substring (start , index ), true )}, false );
438+ int [][] specifierParts = createTokens (node , new StringPart []{new StringPart (text .substring (start , index ), true )});
410439 specifierValue [4 ] = specifierParts .length ;
411440 for (int [] part : specifierParts ) {
412441 part [1 ] = valueIndex ;
@@ -423,6 +452,9 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
423452 index ++;
424453 }
425454 break ;
455+ case '#' :
456+ raiseInvalidSyntax (node , ERROR_MESSAGE_HASH_IN_EXPRESSION );
457+ break ;
426458 case '\n' :
427459 case '\b' :
428460 case '\u0007' :
@@ -482,6 +514,10 @@ protected static int[][] createTokens(FormatStringLiteralNode node, StringPart[]
482514 createExpressionToken (node , values , valueIndex , start , index - 1 );
483515 raiseInvalidSyntax (node , ERROR_MESSAGE_SINGLE_BRACE );
484516 break ;
517+ case STATE_EXPRESSION :
518+ // expression is not allowed to span multiple f-strings: f'{3+' f'1}' is not
519+ // the same as f'{3+1}'
520+ raiseInvalidSyntax (node , ERROR_MESSAGE_EXPECTING_CLOSING_BRACE );
485521 }
486522 }
487523 }
@@ -506,10 +542,18 @@ private static int[] createExpressionToken(FormatStringLiteralNode node, StringP
506542 return new int []{TOKEN_TYPE_EXPRESSION , valueIndex , start , end , 0 };
507543 }
508544
545+ private static void raiseUnmatchingClosingPar (FormatStringLiteralNode node , char opening , char closing ) {
546+ PythonLanguage .getCore ().raiseInvalidSyntax (node , ERROR_MESSAGE_CLOSING_PAR_DOES_NOT_MATCH , closing , opening );
547+ }
548+
509549 private static void raiseInvalidSyntax (FormatStringLiteralNode node , String message ) {
510550 PythonLanguage .getCore ().raiseInvalidSyntax (node , message );
511551 }
512552
553+ private static void raiseInvalidSyntax (FormatStringLiteralNode node , String message , Object ... args ) {
554+ PythonLanguage .getCore ().raiseInvalidSyntax (node , message , args );
555+ }
556+
513557 private static ExpressionNode createExpression (String src , VirtualFrame frame ) {
514558 PythonParser parser = PythonLanguage .getCore ().getParser ();
515559 Source source = Source .newBuilder (PythonLanguage .ID , src , "<fstring>" ).build ();
0 commit comments