diff --git a/esprima/character.py b/esprima/character.py index 929706c..9b1f6d5 100644 --- a/esprima/character.py +++ b/esprima/character.py @@ -82,7 +82,7 @@ del U_CATEGORIES, UNICODE_LETTER, UNICODE_COMBINING_MARK del UNICODE_DIGIT, UNICODE_CONNECTOR_PUNCTUATION -del DECIMAL_CONV, OCTAL_CONV, HEX_CONV +del DECIMAL_CONV class Character: @staticmethod diff --git a/esprima/esprima.py b/esprima/esprima.py index faea0c2..c1fd34a 100644 --- a/esprima/esprima.py +++ b/esprima/esprima.py @@ -49,6 +49,9 @@ def parse(code, options=None, delegate=None, **kwargs): options['jsx'] = True options['classProperties'] = True + # Auto-enable features for ES2024 + options['classProperties'] = True # ES2022: Public class fields + commentHandler = None def proxyDelegate(node, metadata): diff --git a/esprima/nodes.py b/esprima/nodes.py index bbbbdb8..929d8b3 100644 --- a/esprima/nodes.py +++ b/esprima/nodes.py @@ -90,9 +90,9 @@ def __init__(self, params, body, expression): class AsyncFunctionDeclaration(Node): - def __init__(self, id, params, body): + def __init__(self, id, params, body, generator=False): self.type = Syntax.FunctionDeclaration - self.generator = False + self.generator = generator self.expression = False self.isAsync = True self.id = id @@ -101,9 +101,9 @@ def __init__(self, id, params, body): class AsyncFunctionExpression(Node): - def __init__(self, id, params, body): + def __init__(self, id, params, body, generator=False): self.type = Syntax.FunctionExpression - self.generator = False + self.generator = generator self.expression = False self.isAsync = True self.id = id @@ -138,10 +138,11 @@ def __init__(self, label): class CallExpression(Node): - def __init__(self, callee, args): + def __init__(self, callee, args, optional=False): self.type = Syntax.CallExpression self.callee = callee self.arguments = args + self.optional = optional class CatchClause(Node): @@ -151,6 +152,12 @@ def __init__(self, param, body): self.body = body +class ChainExpression(Node): + def __init__(self, expression): + self.type = Syntax.ChainExpression + self.expression = expression + + class ClassBody(Node): def __init__(self, body): self.type = Syntax.ClassBody @@ -174,11 +181,12 @@ def __init__(self, id, superClass, body): class ComputedMemberExpression(Node): - def __init__(self, object, property): + def __init__(self, object, property, optional=False): self.type = Syntax.MemberExpression self.computed = True self.object = object self.property = property + self.optional = optional class ConditionalExpression(Node): @@ -275,6 +283,14 @@ def __init__(self, left, right, body): self.body = body +class ForAwaitStatement(Node): + def __init__(self, left, right, body): + self.type = Syntax.ForAwaitStatement + self.left = left + self.right = right + self.body = body + + class ForStatement(Node): def __init__(self, init, test, update, body): self.type = Syntax.ForStatement @@ -312,6 +328,18 @@ def __init__(self, name): self.name = name +class PrivateIdentifier(Node): + def __init__(self, name): + self.type = Syntax.PrivateIdentifier + self.name = name + + +class StaticBlock(Node): + def __init__(self, body): + self.type = Syntax.StaticBlock + self.body = body + + class IfStatement(Node): def __init__(self, test, consequent, alternate): self.type = Syntax.IfStatement @@ -326,10 +354,14 @@ def __init__(self): class ImportDeclaration(Node): - def __init__(self, specifiers, source): + def __init__(self, specifiers, source, assertions=None, attributes=None): self.type = Syntax.ImportDeclaration self.specifiers = specifiers self.source = source + if assertions is not None: + self.assertions = assertions + if attributes is not None: + self.attributes = attributes class ImportDefaultSpecifier(Node): @@ -472,11 +504,12 @@ def __init__(self, argument): class StaticMemberExpression(Node): - def __init__(self, object, property): + def __init__(self, object, property, optional=False): self.type = Syntax.MemberExpression self.computed = False self.object = object self.property = property + self.optional = optional class Super(Node): diff --git a/esprima/parser.py b/esprima/parser.py index 2639f4c..11ff5ad 100644 --- a/esprima/parser.py +++ b/esprima/parser.py @@ -108,29 +108,30 @@ def __init__(self, code, options={}, delegate=None): self.scanner.trackComment = self.config.comment self.operatorPrecedence = { - '||': 1, - '&&': 2, - '|': 3, - '^': 4, - '&': 5, - '==': 6, - '!=': 6, - '===': 6, - '!==': 6, - '<': 7, - '>': 7, - '<=': 7, - '>=': 7, - 'instanceof': 7, - 'in': 7, - '<<': 8, - '>>': 8, - '>>>': 8, - '+': 9, - '-': 9, - '*': 11, - '/': 11, - '%': 11, + '??': 1, # ES2020: Nullish coalescing + '||': 2, + '&&': 3, + '|': 4, + '^': 5, + '&': 6, + '==': 7, + '!=': 7, + '===': 7, + '!==': 7, + '<': 8, + '>': 8, + '<=': 8, + '>=': 8, + 'instanceof': 8, + 'in': 8, + '<<': 9, + '>>': 9, + '>>>': 9, + '+': 10, + '-': 10, + '*': 12, + '/': 12, + '%': 12, } self.lookahead = RawToken( @@ -204,6 +205,8 @@ def unexpectedTokenError(self, token=None, message=None): msg = Messages.UnexpectedIdentifier elif typ is Token.NumericLiteral: msg = Messages.UnexpectedNumber + elif typ is Token.BigIntLiteral: + msg = Messages.UnexpectedNumber elif typ is Token.StringLiteral: msg = Messages.UnexpectedString elif typ is Token.Template: @@ -457,7 +460,12 @@ def matchAssign(self): return False op = self.lookahead.value - return op in ('=', '*=', '**=', '/=', '%=', '+=', '-=', '<<=', '>>=', '>>>=', '&=', '^=', '|=') + operators = ['=', '*=', '**=', '/=', '%=', '+=', '-=', '<<=', '>>=', '>>>=', '&=', '^=', '|='] + + # ES2021: Logical assignment operators - always enabled + operators.extend(['||=', '&&=', '??=']) + + return op in operators # Cover grammar support. # @@ -548,8 +556,15 @@ def parsePrimaryExpression(self): self.tolerateUnexpectedToken(self.lookahead) expr = self.parseFunctionExpression() if self.matchAsyncFunction() else self.finalize(node, Node.Identifier(self.nextToken().value)) + elif typ is Token.PrivateIdentifier: + # ES2021: Private identifiers are only valid in class contexts + # For now, we'll parse them as private identifier nodes + token = self.nextToken() + expr = self.finalize(node, Node.PrivateIdentifier(token.value)) + elif typ in ( Token.NumericLiteral, + Token.BigIntLiteral, Token.StringLiteral, ): if self.context.strict and self.lookahead.octal: @@ -611,6 +626,8 @@ def parsePrimaryExpression(self): expr = self.finalize(node, Node.ThisExpression()) elif self.matchKeyword('class'): expr = self.parseClassExpression() + elif self.matchImportMeta(): + expr = self.parseImportMeta() elif self.matchImportCall(): expr = self.parseImportCall() else: @@ -706,12 +723,17 @@ def parseObjectPropertyKey(self): if typ in ( Token.StringLiteral, Token.NumericLiteral, + Token.BigIntLiteral, ): if self.context.strict and token.octal: self.tolerateUnexpectedToken(token, Messages.StrictOctalLiteral) raw = self.getTokenRaw(token) key = self.finalize(node, Node.Literal(token.value, raw)) + elif typ is Token.PrivateIdentifier: + # ES2021: Private identifiers + key = self.finalize(node, Node.PrivateIdentifier(token.value)) + elif typ in ( Token.Identifier, Token.BooleanLiteral, @@ -844,7 +866,29 @@ def parseTemplateHead(self): def parseTemplateElement(self): if self.lookahead.type is not Token.Template: - self.throwUnexpectedToken() + # Check if this is the coordination issue: expecting template but got punctuator } + if (self.lookahead.type is Token.Punctuator and + self.lookahead.value == '}' and + self.scanner.curlyStack and + '${' in self.scanner.curlyStack): + # Try to fix the scanner state: ensure the curlyStack top is '${' for template scanning + # Find the last '${' in the stack and bring it to the top + try: + last_template_idx = len(self.scanner.curlyStack) - 1 - self.scanner.curlyStack[::-1].index('${') + # Remove the '${' and put it back on top + template_marker = self.scanner.curlyStack.pop(last_template_idx) + self.scanner.curlyStack.append(template_marker) + + # Reset scanner position to re-scan the } + self.scanner.index -= 1 + # Get a fresh token + self.lookahead = self.scanner.lex() + except (ValueError, IndexError): + # If we can't fix the state, fall back to original error + pass + + if self.lookahead.type is not Token.Template: + self.throwUnexpectedToken(self.lookahead) node = self.createNode() token = self.nextToken() @@ -1015,6 +1059,18 @@ def parseIdentifierName(self): self.throwUnexpectedToken(token) return self.finalize(node, Node.Identifier(token.value)) + def parsePropertyName(self): + """Parse property name which can be an identifier or private identifier""" + node = self.createNode() + token = self.nextToken() + + if token.type is Token.PrivateIdentifier: + return self.finalize(node, Node.PrivateIdentifier(token.value)) + elif self.isIdentifierName(token): + return self.finalize(node, Node.Identifier(token.value)) + else: + self.throwUnexpectedToken(token) + def parseNewExpression(self): node = self.createNode() @@ -1074,11 +1130,33 @@ def matchImportCall(self): return match + def matchImportMeta(self): + match = self.matchKeyword('import') + if match: + state = self.scanner.saveState() + self.scanner.scanComments() + next = self.scanner.lex() + self.scanner.restoreState(state) + match = (next.type is Token.Punctuator) and (next.value == '.') + + return match + def parseImportCall(self): node = self.createNode() self.expectKeyword('import') return self.finalize(node, Node.Import()) + def parseImportMeta(self): + node = self.createNode() + self.expectKeyword('import') + self.expect('.') + # After import., we expect 'meta' + if self.lookahead.type != Token.Identifier or self.lookahead.value != 'meta': + self.throwUnexpectedToken(self.lookahead) + property = self.parseIdentifierName() + meta = Node.Identifier('import') + return self.finalize(node, Node.MetaProperty(meta, property)) + def parseLeftHandSideExpressionAllowCall(self): startToken = self.lookahead maybeAsync = self.matchContextualKeyword('async') @@ -1100,9 +1178,28 @@ def parseLeftHandSideExpressionAllowCall(self): self.context.isBindingElement = False self.context.isAssignmentTarget = True self.expect('.') - property = self.parseIdentifierName() + property = self.parsePropertyName() # Can handle private identifiers expr = self.finalize(self.startNode(startToken), Node.StaticMemberExpression(expr, property)) + elif self.match('?.'): # ES2020 optional chaining - always enabled + self.context.isBindingElement = False + self.context.isAssignmentTarget = False # Optional chaining is not a valid assignment target + self.expect('?.') + if self.match('('): + # Optional call expression: obj?.() + args = self.parseArguments() + expr = self.finalize(self.startNode(startToken), Node.CallExpression(expr, args, optional=True)) + elif self.match('['): + # Optional computed member expression: obj?.[prop] + self.expect('[') + property = self.isolateCoverGrammar(self.parseExpression) + self.expect(']') + expr = self.finalize(self.startNode(startToken), Node.ComputedMemberExpression(expr, property, optional=True)) + else: + # Optional static member expression: obj?.prop + property = self.parsePropertyName() + expr = self.finalize(self.startNode(startToken), Node.StaticMemberExpression(expr, property, optional=True)) + elif self.match('('): asyncArrow = maybeAsync and (startToken.lineNumber == self.lookahead.lineNumber) self.context.isBindingElement = False @@ -1168,9 +1265,24 @@ def parseLeftHandSideExpression(self): self.context.isBindingElement = False self.context.isAssignmentTarget = True self.expect('.') - property = self.parseIdentifierName() + property = self.parsePropertyName() # Can handle private identifiers expr = self.finalize(node, Node.StaticMemberExpression(expr, property)) + elif self.match('?.'): # ES2020 optional chaining - always enabled + self.context.isBindingElement = False + self.context.isAssignmentTarget = False # Optional chaining is not a valid assignment target + self.expect('?.') + if self.match('['): + # Optional computed member expression: obj?.[prop] + self.expect('[') + property = self.isolateCoverGrammar(self.parseExpression) + self.expect(']') + expr = self.finalize(node, Node.ComputedMemberExpression(expr, property, optional=True)) + else: + # Optional static member expression: obj?.prop + property = self.parsePropertyName() + expr = self.finalize(node, Node.StaticMemberExpression(expr, property, optional=True)) + elif self.lookahead.type is Token.Template and self.lookahead.head: quasi = self.parseTemplateLiteral() expr = self.finalize(node, Node.TaggedTemplateExpression(expr, quasi)) @@ -1538,7 +1650,7 @@ def parseStatementListItem(self): self.tolerateUnexpectedToken(self.lookahead, Messages.IllegalExportDeclaration) statement = self.parseExportDeclaration() elif value == 'import': - if self.matchImportCall(): + if self.matchImportCall() or self.matchImportMeta(): statement = self.parseExpressionStatement() else: if not self.context.isModule: @@ -1805,6 +1917,18 @@ def parseVariableStatement(self): return self.finalize(node, Node.VariableDeclaration(declarations, 'var')) + def parseUsingDeclaration(self): + node = self.createNode() + # Consume 'using' as an identifier token + token = self.nextToken() + if token.type != Token.Identifier or token.value != 'using': + self.throwUnexpectedToken(token) + + declarations = self.parseBindingList('using', Params(inFor=False)) + self.consumeSemicolon() + + return self.finalize(node, Node.VariableDeclaration(declarations, 'using')) + # https://tc39.github.io/ecma262/#sec-empty-statement def parseEmptyStatement(self): @@ -1903,9 +2027,16 @@ def parseForStatement(self): forIn = True left = None right = None + isAwait = False node = self.createNode() self.expectKeyword('for') + + # Check for 'await' keyword after 'for' (ES2018) - always enabled + if self.context.allowAwait and self.matchContextualKeyword('await'): + isAwait = True + self.nextToken() + self.expect('(') if self.match(';'): @@ -1921,6 +2052,8 @@ def parseForStatement(self): self.context.allowIn = previousAllowIn if len(declarations) == 1 and self.matchKeyword('in'): + if isAwait: + self.throwError('for-await can only be used with for-of loops, not for-in') decl = declarations[0] if decl.init and (decl.id.type is Syntax.ArrayPattern or decl.id.type is Syntax.ObjectPattern or self.context.strict): self.tolerateError(Messages.ForInOfLoopInitializer, 'for-in') @@ -1956,6 +2089,8 @@ def parseForStatement(self): self.context.allowIn = previousAllowIn if len(declarations) == 1 and declarations[0].init is None and self.matchKeyword('in'): + if isAwait: + self.throwError('for-await can only be used with for-of loops, not for-in') init = self.finalize(init, Node.VariableDeclaration(declarations, kind)) self.nextToken() left = init @@ -1979,6 +2114,8 @@ def parseForStatement(self): self.context.allowIn = previousAllowIn if self.matchKeyword('in'): + if isAwait: + self.throwError('for-await can only be used with for-of loops, not for-in') if not self.context.isAssignmentTarget or init.type is Syntax.AssignmentExpression: self.tolerateError(Messages.InvalidLHSInForIn) @@ -2030,6 +2167,12 @@ def parseForStatement(self): if forIn: return self.finalize(node, Node.ForInStatement(left, right, body)) + # for-await is only valid with for-of loops + if isAwait: + if forIn: + self.throwError('for-await is only valid with for-of loops') + return self.finalize(node, Node.ForAwaitStatement(left, right, body)) + return self.finalize(node, Node.ForOfStatement(left, right, body)) # https://tc39.github.io/ecma262/#sec-continue-statement @@ -2225,24 +2368,33 @@ def parseCatchClause(self): self.expectKeyword('catch') - self.expect('(') - if self.match(')'): - self.throwUnexpectedToken(self.lookahead) - - params = [] - param = self.parsePattern(params) - paramMap = {} - for p in params: - key = '$' + p.value - if key in paramMap: - self.tolerateError(Messages.DuplicateBinding, p.value) - paramMap[key] = True - - if self.context.strict and param.type is Syntax.Identifier: - if self.scanner.isRestrictedWord(param.name): - self.tolerateError(Messages.StrictCatchVariable) - - self.expect(')') + param = None + + # ES2019: Optional catch binding + if self.match('('): + self.expect('(') + if self.match(')'): + # ES2019: catch without binding parameter - always enabled + param = None + else: + params = [] + param = self.parsePattern(params) + paramMap = {} + for p in params: + key = '$' + p.value + if key in paramMap: + self.tolerateError(Messages.DuplicateBinding, p.value) + paramMap[key] = True + + if self.context.strict and param.type is Syntax.Identifier: + if self.scanner.isRestrictedWord(param.name): + self.tolerateError(Messages.StrictCatchVariable) + + self.expect(')') + else: + # ES2019: catch without parentheses at all - always enabled + param = None + body = self.parseBlock() return self.finalize(node, Node.CatchClause(param, body)) @@ -2280,6 +2432,7 @@ def parseStatement(self): Token.BooleanLiteral, Token.NullLiteral, Token.NumericLiteral, + Token.BigIntLiteral, Token.StringLiteral, Token.Template, Token.RegularExpression, @@ -2298,7 +2451,13 @@ def parseStatement(self): statement = self.parseExpressionStatement() elif typ is Token.Identifier: - statement = self.parseFunctionDeclaration() if self.matchAsyncFunction() else self.parseLabelledStatement() + # ES2024: Using declarations - always enabled + if self.lookahead.value == 'using': + statement = self.parseUsingDeclaration() + elif self.matchAsyncFunction(): + statement = self.parseFunctionDeclaration() + else: + statement = self.parseLabelledStatement() elif typ is Token.Keyword: value = self.lookahead.value @@ -2463,6 +2622,10 @@ def parseFunctionDeclaration(self, identifierIsOptional=False): isGenerator = False if isAsync else self.match('*') if isGenerator: self.nextToken() + elif isAsync and self.match('*'): + # Support async generator functions (ES2018) + isGenerator = True + self.nextToken() id = None firstRestricted = None @@ -2508,7 +2671,7 @@ def parseFunctionDeclaration(self, identifierIsOptional=False): self.context.allowYield = previousAllowYield if isAsync: - return self.finalize(node, Node.AsyncFunctionDeclaration(id, params, body)) + return self.finalize(node, Node.AsyncFunctionDeclaration(id, params, body, isGenerator)) return self.finalize(node, Node.FunctionDeclaration(id, params, body, isGenerator)) @@ -2524,6 +2687,10 @@ def parseFunctionExpression(self): isGenerator = False if isAsync else self.match('*') if isGenerator: self.nextToken() + elif isAsync and self.match('*'): + # Support async generator functions (ES2018) + isGenerator = True + self.nextToken() id = None firstRestricted = None @@ -2568,7 +2735,7 @@ def parseFunctionExpression(self): self.context.allowYield = previousAllowYield if isAsync: - return self.finalize(node, Node.AsyncFunctionExpression(id, params, body)) + return self.finalize(node, Node.AsyncFunctionExpression(id, params, body, isGenerator)) return self.finalize(node, Node.FunctionExpression(id, params, body, isGenerator)) @@ -2617,10 +2784,12 @@ def qualifiedPropertyName(self, token): typ = token.type if typ in ( Token.Identifier, + Token.PrivateIdentifier, Token.StringLiteral, Token.BooleanLiteral, Token.NullLiteral, Token.NumericLiteral, + Token.BigIntLiteral, Token.Keyword, ): return True @@ -2726,9 +2895,17 @@ def parseClassElement(self, hasConstructor): computed = self.match('[') key = self.parseObjectPropertyKey() id = key - if id.name == 'static' and (self.qualifiedPropertyName(self.lookahead) or self.match('*')): + if id.name == 'static' and (self.qualifiedPropertyName(self.lookahead) or self.match('*') or self.match('{')): token = self.lookahead isStatic = True + + # ES2022: Static class blocks - always enabled + if self.match('{'): + # This is a static block, not a static property + kind = 'static' + body = self.parseBlock() + return self.finalize(node, Node.StaticBlock(body)) + computed = self.match('[') if self.match('*'): self.nextToken() @@ -2769,6 +2946,18 @@ def parseClassElement(self, hasConstructor): key = self.parseObjectPropertyKey() value = self.parseGeneratorMethod() + elif token.type is Token.PrivateIdentifier: + # ES2021: Private class fields and methods + kind = 'init' # default to field + # key is already parsed by parseObjectPropertyKey + if self.match('='): + self.nextToken() + value = self.parseAssignmentExpression() + elif self.match('('): + # Private method + kind = 'method' + value = self.parsePropertyMethodFunction() + if not kind and key and self.match('('): kind = 'method' value = self.parsePropertyMethodAsyncFunction() if isAsync else self.parsePropertyMethodFunction() @@ -2854,6 +3043,10 @@ def parseModule(self): self.context.strict = True self.context.isModule = True self.scanner.isModule = True + + # ES2022: Enable top-level await in modules - always enabled + self.context.allowAwait = True + node = self.createNode() body = self.parseDirectivePrologues() while self.lookahead.type is not Token.EOF: @@ -2969,9 +3162,67 @@ def parseImportDeclaration(self): self.throwError(message, self.lookahead.value) self.nextToken() src = self.parseModuleSpecifier() + + # Parse import assertions (ES2022) or attributes (ES2023) + assertions = None + attributes = None + + if self.matchContextualKeyword('assert'): + # ES2022 import assertions: import ... assert { type: "json" } + self.nextToken() + assertions = self.parseImportAssertions() + elif self.matchKeyword('with') or self.matchContextualKeyword('with'): + # ES2023 import attributes: import ... with { type: "json" } + self.nextToken() + attributes = self.parseImportAttributes() + self.consumeSemicolon() - return self.finalize(node, Node.ImportDeclaration(specifiers, src)) + return self.finalize(node, Node.ImportDeclaration(specifiers, src, assertions, attributes)) + + def parseImportAssertions(self): + # Parse { type: "json", ... } + return self.parseImportAttributeList() + + def parseImportAttributes(self): + # Parse { type: "json", ... } + return self.parseImportAttributeList() + + def parseImportAttributeList(self): + # Parse { key: "value", key2: "value2" } + attributes = [] + + self.expect('{') + + while not self.match('}'): + if len(attributes) > 0: + self.expect(',') + if self.match('}'): + break + + # Parse key + if self.lookahead.type == Token.Identifier: + key = self.parseIdentifierName() + elif self.lookahead.type == Token.StringLiteral: + key = self.parsePrimaryExpression() + else: + self.throwUnexpectedToken(self.lookahead) + + self.expect(':') + + # Parse value (must be a string literal) + if self.lookahead.type != Token.StringLiteral: + self.throwError('Import assertion/attribute value must be a string literal') + value = self.parsePrimaryExpression() + + # Create an object property-like structure + attr = Object() + attr.key = key + attr.value = value + attributes.append(attr) + + self.expect('}') + return attributes # https://tc39.github.io/ecma262/#sec-exports diff --git a/esprima/scanner.py b/esprima/scanner.py index 53502a5..b52c001 100644 --- a/esprima/scanner.py +++ b/esprima/scanner.py @@ -24,6 +24,7 @@ from __future__ import absolute_import, unicode_literals import re +import warnings from .objects import Object from .compat import xrange, unicode, uchr, uord @@ -31,6 +32,7 @@ from .messages import Messages from .token import Token +warnings.simplefilter(action='ignore', category=FutureWarning) def hexValue(ch): return HEX_CONV[ch] @@ -69,7 +71,7 @@ def __init__(self, multiLine=None, slice=None, range=None, loc=None): class RawToken(Object): - def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, octal=None, cooked=None, head=None, tail=None, lineNumber=None, lineStart=None, start=None, end=None): + def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, octal=None, cooked=None, head=None, tail=None, lineNumber=None, lineStart=None, start=None, end=None, raw=None): self.type = type self.value = value self.pattern = pattern @@ -83,6 +85,7 @@ def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, self.lineStart = lineStart self.start = start self.end = end + self.raw = raw class ScannerState(Object): @@ -535,6 +538,54 @@ def scanIdentifier(self): end=self.index ) + def scanPrivateIdentifier(self): + start = self.index + + # Consume the '#' + self.index += 1 + + # The next character must start an identifier + if self.eof() or not Character.isIdentifierStart(self.source[self.index]): + self.throwUnexpectedToken() + + # Scan the identifier part + id = self.getIdentifier() + + return RawToken( + type=Token.PrivateIdentifier, + value='#' + id, + lineNumber=self.lineNumber, + lineStart=self.lineStart, + start=start, + end=self.index + ) + + def scanHashbang(self): + start = self.index + + # Consume '#!' + self.index += 2 + + # Consume the rest of the line + while not self.eof(): + ch = self.source[self.index] + if Character.isLineTerminator(ch): + break + self.index += 1 + + # Hashbangs are treated as comments and skipped + # Advance past the line terminator if present + if not self.eof() and Character.isLineTerminator(self.source[self.index]): + if self.source[self.index] == '\r' and self.source[self.index + 1] == '\n': + self.index += 2 + else: + self.index += 1 + self.lineNumber += 1 + self.lineStart = self.index + + # Return the next actual token + return self.lex() + # https://tc39.github.io/ecma262/#sec-punctuators def scanPunctuator(self): @@ -570,11 +621,34 @@ def scanPunctuator(self): '[', ']', ':', - '?', '~', ): self.index += 1 + elif str == '?': + # Check for nullish coalescing assignment operator (??=) - ES2021 + if (self.index + 2 < self.length and + self.source[self.index + 1:self.index + 3] == '?='): # ES2021 nullish assignment - always enabled + self.index += 3 + str = '??=' + # Check for nullish coalescing operator (??) - ES2020, always enabled + elif (self.index + 1 < self.length and + self.source[self.index + 1] == '?'): + self.index += 2 + str = '??' + # Check for optional chaining operator (?.) - ES2020, always enabled + elif (self.index + 1 < self.length and + self.source[self.index + 1] == '.'): + # Only if not followed by a digit (to avoid confusion with ?.123) + if (self.index + 2 >= self.length or + not Character.isDecimalDigit(self.source[self.index + 2])): + self.index += 2 + str = '?.' + else: + self.index += 1 + else: + self.index += 1 + else: # 4-character punctuator. str = self.source[self.index:self.index + 4] @@ -586,7 +660,7 @@ def scanPunctuator(self): str = str[:3] if str in ( '===', '!==', '>>>', - '<<=', '>>=', '**=' + '<<=', '>>=', '**=', ): self.index += 3 else: @@ -594,10 +668,10 @@ def scanPunctuator(self): # 2-character punctuators. str = str[:2] if str in ( - '&&', '||', '==', '!=', + '==', '!=', '+=', '-=', '*=', '/=', '++', '--', '<<', '>>', - '&=', '|=', '^=', '%=', + '^=', '%=', '<=', '>=', '=>', '**', ): self.index += 2 @@ -605,8 +679,34 @@ def scanPunctuator(self): # 1-character punctuators. str = self.source[self.index] - if str in '<>=!+-*%&|^/': + if str in '<>=!+-*%^/': self.index += 1 + elif str == '&': + # Check for logical assignment &&= (ES2021), always enabled + if self.source[self.index + 1:self.index + 3] == '&=': + str = '&&=' + self.index += 3 + elif self.source[self.index + 1] == '&': + str = '&&' + self.index += 2 + elif self.source[self.index + 1] == '=': + str = '&=' + self.index += 2 + else: + self.index += 1 + elif str == '|': + # Check for logical assignment ||= (ES2021), always enabled + if self.source[self.index + 1:self.index + 3] == '|=': + str = '||=' + self.index += 3 + elif self.source[self.index + 1] == '|': + str = '||' + self.index += 2 + elif self.source[self.index + 1] == '=': + str = '|=' + self.index += 2 + else: + self.index += 1 if self.index == start: self.throwUnexpectedToken() @@ -751,18 +851,34 @@ def scanNumericLiteral(self): if self.isImplicitOctalLiteral(): return self.scanOctalLiteral(ch, start) - while Character.isDecimalDigit(self.source[self.index]): - num += self.source[self.index] - self.index += 1 + while Character.isDecimalDigit(self.source[self.index]) or self.source[self.index] == '_': # ES2021 numeric separators always enabled + ch = self.source[self.index] + if ch == '_': + # ES2021: Numeric separator - validate placement + next_char = self.source[self.index + 1] if self.index + 1 < self.length else '' + if not Character.isDecimalDigit(next_char): + self.throwUnexpectedToken() + self.index += 1 # Skip separator + else: + num += ch + self.index += 1 ch = self.source[self.index] if ch == '.': num += self.source[self.index] self.index += 1 - while Character.isDecimalDigit(self.source[self.index]): - num += self.source[self.index] - self.index += 1 + while Character.isDecimalDigit(self.source[self.index]) or self.source[self.index] == '_': # ES2021 numeric separators always enabled + ch = self.source[self.index] + if ch == '_': + # ES2021: Numeric separator in fractional part + next_char = self.source[self.index + 1] if self.index + 1 < self.length else '' + if not Character.isDecimalDigit(next_char): + self.throwUnexpectedToken() + self.index += 1 # Skip separator + else: + num += ch + self.index += 1 ch = self.source[self.index] @@ -776,13 +892,45 @@ def scanNumericLiteral(self): self.index += 1 if Character.isDecimalDigit(self.source[self.index]): - while Character.isDecimalDigit(self.source[self.index]): - num += self.source[self.index] - self.index += 1 + while Character.isDecimalDigit(self.source[self.index]) or self.source[self.index] == '_': # ES2021 numeric separators always enabled + ch = self.source[self.index] + if ch == '_': + # ES2021: Numeric separator in exponent + next_char = self.source[self.index + 1] if self.index + 1 < self.length else '' + if not Character.isDecimalDigit(next_char): + self.throwUnexpectedToken() + self.index += 1 # Skip separator + else: + num += ch + self.index += 1 else: self.throwUnexpectedToken() + # Check for BigInt literal (ES2020) + if self.source[self.index] == 'n': + # BigInt literals cannot have decimals or exponents + if '.' in num or 'e' in num.lower(): + self.throwUnexpectedToken() + + # ES2020+ BigInt support - always enabled + self.index += 1 # consume 'n' + # BigInt value: convert string to int for storage + try: + bigint_value = int(num) + except ValueError: + self.throwUnexpectedToken() + + return RawToken( + type=Token.BigIntLiteral, + value=bigint_value, + raw=num + 'n', + lineNumber=self.lineNumber, + lineStart=self.lineStart, + start=start, + end=self.index + ) + if Character.isIdentifierStart(self.source[self.index]): self.throwUnexpectedToken() @@ -1151,6 +1299,10 @@ def lex(self): ch = self.source[self.index] + # ES2023: Hashbang grammar - only at the very beginning of source, always enabled + if self.index == 0 and ch == '#' and self.index + 1 < self.length and self.source[self.index + 1] == '!': + return self.scanHashbang() + if Character.isIdentifierStart(ch): return self.scanIdentifier() @@ -1178,6 +1330,10 @@ def lex(self): if ch == '`' or (ch == '}' and self.curlyStack and self.curlyStack[-1] == '${'): return self.scanTemplate() + # ES2021: Private identifiers start with # - always enabled + if ch == '#': + return self.scanPrivateIdentifier() + # Possible identifier start in a surrogate pair. cp = ord(ch) if cp >= 0xD800 and cp < 0xDFFF: diff --git a/esprima/syntax.py b/esprima/syntax.py index 001b641..fda34a7 100644 --- a/esprima/syntax.py +++ b/esprima/syntax.py @@ -36,6 +36,7 @@ class Syntax: BreakStatement = "BreakStatement" CallExpression = "CallExpression" CatchClause = "CatchClause" + ChainExpression = "ChainExpression" ClassBody = "ClassBody" ClassDeclaration = "ClassDeclaration" ClassExpression = "ClassExpression" @@ -53,6 +54,7 @@ class Syntax: ForStatement = "ForStatement" ForOfStatement = "ForOfStatement" ForInStatement = "ForInStatement" + ForAwaitStatement = "ForAwaitStatement" FunctionDeclaration = "FunctionDeclaration" FunctionExpression = "FunctionExpression" Identifier = "Identifier" @@ -69,6 +71,8 @@ class Syntax: MetaProperty = "MetaProperty" MethodDefinition = "MethodDefinition" FieldDefinition = "FieldDefinition" + PrivateIdentifier = "PrivateIdentifier" + StaticBlock = "StaticBlock" NewExpression = "NewExpression" ObjectExpression = "ObjectExpression" ObjectPattern = "ObjectPattern" diff --git a/esprima/token.py b/esprima/token.py index 846ced6..fb3621d 100644 --- a/esprima/token.py +++ b/esprima/token.py @@ -35,6 +35,8 @@ class Token: StringLiteral = 8 RegularExpression = 9 Template = 10 + BigIntLiteral = 11 + PrivateIdentifier = 12 TokenName = {} @@ -48,3 +50,5 @@ class Token: TokenName[Token.StringLiteral] = "String" TokenName[Token.RegularExpression] = "RegularExpression" TokenName[Token.Template] = "Template" +TokenName[Token.BigIntLiteral] = "BigInt" +TokenName[Token.PrivateIdentifier] = "PrivateIdentifier" diff --git a/esprima/visitor.py b/esprima/visitor.py index ad32375..429fb20 100644 --- a/esprima/visitor.py +++ b/esprima/visitor.py @@ -279,6 +279,9 @@ def visit_dict(self, obj): items = [] for k, item in obj.items(): if item is not None and not k.startswith('_'): + # Skip 'optional' field when it's False (default value) + if k == 'optional' and item is False: + continue v = yield item k = unicode(k) items.append((self.map.get(k, k), v)) @@ -286,3 +289,6 @@ def visit_dict(self, obj): def visit_SRE_Pattern(self, obj): yield Visited({}) + + def visit_Pattern(self, obj): + yield Visited({})