From: Gabriel Ivăncescu gabrielopcode@gmail.com
ES5 keywords (let, get, set) are allowed as identifiers almost everywhere. Normally, it is ambiguous when declaring a label (LabelledStatement) using an ES5 keyword, so to prevent further Bison merge conflicts, we check if the keyword is actually followed by a colon, and we don't treat it as a colon if it is. Bison has only one lookahead token that can be used to decide a shift/reduce so it results in a conflict, as the parser can't immediately decide whether to shift the : (following the label and thus treating it as a label) or to reduce Identifier (potentially treating it as the start of another construct).
This also solves other cases of such keywords followed by colons (for instance, in property defs), but we have to remove it from the reserved list in such case, else it creates an ambiguity with the get/set keywords in property definitions (the *NoES5 variants), and then the new tests fail to parse.
Signed-off-by: Gabriel Ivăncescu gabrielopcode@gmail.com --- dlls/jscript/lex.c | 97 +++++++++++++++++++++++++--------------- dlls/jscript/parser.h | 1 + dlls/jscript/parser.y | 50 ++++++++++++++------- dlls/mshtml/tests/es5.js | 24 +++++++++- 4 files changed, 118 insertions(+), 54 deletions(-)
diff --git a/dlls/jscript/lex.c b/dlls/jscript/lex.c index ee29bff1e37..3cfc4d1acf6 100644 --- a/dlls/jscript/lex.c +++ b/dlls/jscript/lex.c @@ -131,34 +131,6 @@ int hex_to_int(WCHAR c) return -1; }
-static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval) -{ - int min = 0, max = ARRAY_SIZE(keywords)-1, r, i; - - while(min <= max) { - i = (min+max)/2; - - r = check_keyword(ctx, keywords[i].word, lval); - if(!r) { - if(ctx->script->version < keywords[i].min_version) { - TRACE("ignoring keyword %s in incompatible mode\n", - debugstr_w(keywords[i].word)); - ctx->ptr -= lstrlenW(keywords[i].word); - return 0; - } - ctx->implicit_nl_semicolon = keywords[i].no_nl; - return keywords[i].token; - } - - if(r > 0) - min = i+1; - else - max = i-1; - } - - return 0; -} - static BOOL skip_html_comment(parser_ctx_t *ctx) { if(!ctx->is_html || ctx->ptr+3 >= ctx->end || @@ -224,6 +196,55 @@ static BOOL skip_spaces(parser_ctx_t *ctx) return ctx->ptr != ctx->end; }
+static BOOL skip_insignificant_space(parser_ctx_t *ctx) +{ + do { + if(!skip_spaces(ctx)) + return FALSE; + } while(skip_comment(ctx) || skip_html_comment(ctx)); + return TRUE; +} + +static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval) +{ + int min = 0, max = ARRAY_SIZE(keywords)-1, r, i; + + while(min <= max) { + i = (min+max)/2; + + r = check_keyword(ctx, keywords[i].word, lval); + if(!r) { + if(ctx->script->version < keywords[i].min_version) { + TRACE("ignoring keyword %s in incompatible mode\n", + debugstr_w(keywords[i].word)); + ctx->ptr -= wcslen(keywords[i].word); + return 0; + } + /* ES5+ keywords are treated as identifiers if followed by a colon (label, prop, etc) */ + if(keywords[i].min_version >= SCRIPTLANGUAGEVERSION_ES5) { + const WCHAR *old = ctx->ptr; + BOOL at_end = !skip_insignificant_space(ctx); + + ctx->next = ctx->ptr; + ctx->ptr = old; + if(!at_end && ctx->next[0] == ':' && &ctx->next[1] != ctx->end && ctx->next[1] != ':') { + ctx->ptr -= wcslen(keywords[i].word); + return 0; + } + } + ctx->implicit_nl_semicolon = keywords[i].no_nl; + return keywords[i].token; + } + + if(r > 0) + min = i+1; + else + max = i-1; + } + + return 0; +} + BOOL unescape(WCHAR *str, size_t *len) { WCHAR *pd, *p, c, *end = str + *len; @@ -545,13 +566,19 @@ static BOOL parse_numeric_literal(parser_ctx_t *ctx, double *ret)
static int next_token(parser_ctx_t *ctx, unsigned *loc, void *lval) { - do { - if(!skip_spaces(ctx)) { - *loc = ctx->ptr - ctx->begin; - return 0; - } - }while(skip_comment(ctx) || skip_html_comment(ctx)); - *loc = ctx->ptr - ctx->begin; + BOOL at_end; + + if(ctx->next) { + ctx->ptr = ctx->next; + ctx->next = NULL; + at_end = (ctx->ptr == ctx->end); + }else { + at_end = !skip_insignificant_space(ctx); + } + + *loc = ctx->ptr - ctx->begin; + if(at_end) + return 0;
if(ctx->implicit_nl_semicolon) { if(ctx->nl) diff --git a/dlls/jscript/parser.h b/dlls/jscript/parser.h index 406bedc6607..ea205593404 100644 --- a/dlls/jscript/parser.h +++ b/dlls/jscript/parser.h @@ -33,6 +33,7 @@ typedef struct _parser_ctx_t { const WCHAR *begin; const WCHAR *end; const WCHAR *ptr; + const WCHAR *next;
script_ctx_t *script; struct _compiler_ctx_t *compiler; diff --git a/dlls/jscript/parser.y b/dlls/jscript/parser.y index 42d695e0846..5328df6afe6 100644 --- a/dlls/jscript/parser.y +++ b/dlls/jscript/parser.y @@ -221,7 +221,7 @@ static expression_t *new_prop_and_value_expression(parser_ctx_t*,property_list_t %type <expr> MemberExpression %type <expr> PrimaryExpression %type <expr> GetterSetterMethod -%type <identifier> Identifier_opt +%type <identifier> Identifier Identifier_opt %type <variable_list> VariableDeclarationList %type <variable_list> VariableDeclarationListNoIn %type <variable_declaration> VariableDeclaration @@ -238,13 +238,14 @@ static expression_t *new_prop_and_value_expression(parser_ctx_t*,property_list_t %type <element_list> ElementList %type <property_list> PropertyNameAndValueList %type <property_definition> PropertyDefinition -%type <literal> PropertyName +%type <literal> PropertyName PropertyNameNoES5 %type <literal> BooleanLiteral %type <ival> AssignOper -%type <identifier> IdentifierName ReservedAsIdentifier +%type <identifier> IdentifierName IdentifierNameNoES5 ReservedAsIdentifier ES5Keyword
%nonassoc LOWER_THAN_ELSE -%nonassoc kELSE +%nonassoc kELSE kIN kINSTANCEOF ':' +%nonassoc kGET kLET kSET
%%
@@ -268,9 +269,9 @@ FunctionStatementList FunctionExpression : kFUNCTION left_bracket FormalParameterList_opt right_bracket '{' FunctionBody '}' { $$ = new_function_expression(ctx, NULL, $3, $6, NULL, ctx->begin + @1, @7 - @1 + 1); } - | kFUNCTION tIdentifier left_bracket FormalParameterList_opt right_bracket '{' FunctionBody '}' + | kFUNCTION Identifier left_bracket FormalParameterList_opt right_bracket '{' FunctionBody '}' { $$ = new_function_expression(ctx, $2, $4, $7, NULL, ctx->begin + @1, @8 - @1 + 1); } - | kFUNCTION tIdentifier kDCOL tIdentifier left_bracket FormalParameterList_opt right_bracket '{' FunctionBody '}' + | kFUNCTION tIdentifier kDCOL Identifier left_bracket FormalParameterList_opt right_bracket '{' FunctionBody '}' { $$ = new_function_expression(ctx, $4, $6, $9, $2, ctx->begin + @1, @10 - @1 + 1); }
/* ECMA-262 10th Edition 14.1 */ @@ -279,8 +280,8 @@ FunctionBody
/* ECMA-262 3rd Edition 13 */ FormalParameterList - : tIdentifier { $$ = new_parameter_list(ctx, $1); } - | FormalParameterList ',' tIdentifier + : Identifier { $$ = new_parameter_list(ctx, $1); } + | FormalParameterList ',' Identifier { $$ = parameter_list_add(ctx, $1, $3); }
/* ECMA-262 3rd Edition 13 */ @@ -381,12 +382,12 @@ VariableDeclarationListNoIn
/* ECMA-262 3rd Edition 12.2 */ VariableDeclaration - : tIdentifier Initialiser_opt + : Identifier Initialiser_opt { $$ = new_variable_declaration(ctx, $1, $2); }
/* ECMA-262 3rd Edition 12.2 */ VariableDeclarationNoIn - : tIdentifier InitialiserNoIn_opt + : Identifier InitialiserNoIn_opt { $$ = new_variable_declaration(ctx, $1, $2); }
/* ECMA-262 3rd Edition 12.2 */ @@ -527,7 +528,7 @@ TryStatement
/* ECMA-262 3rd Edition 12.14 */ Catch - : kCATCH left_bracket tIdentifier right_bracket Block + : kCATCH left_bracket Identifier right_bracket Block { $$ = new_catch_block(ctx, $3, $5); }
/* ECMA-262 3rd Edition 12.14 */ @@ -786,7 +787,7 @@ ArgumentList /* ECMA-262 3rd Edition 11.1 */ PrimaryExpression : kTHIS { $$ = new_expression(ctx, EXPR_THIS, 0); } - | tIdentifier { $$ = new_identifier_expression(ctx, $1); } + | Identifier { $$ = new_identifier_expression(ctx, $1); } | Literal { $$ = new_literal_expression(ctx, $1); } | ArrayLiteral { $$ = $1; } | ObjectLiteral { $$ = $1; } @@ -840,7 +841,7 @@ PropertyNameAndValueList
/* ECMA-262 5.1 Edition 12.2.6 */ PropertyDefinition - : PropertyName ':' AssignmentExpression + : PropertyNameNoES5 ':' AssignmentExpression { $$ = new_property_definition(ctx, PROPERTY_DEFINITION_VALUE, $1, $3); } | kGET PropertyName GetterSetterMethod { $$ = new_property_definition(ctx, PROPERTY_DEFINITION_GETTER, $2, $3); } @@ -857,13 +858,26 @@ PropertyName | tStringLiteral { $$ = new_string_literal(ctx, $1); } | tNumericLiteral { $$ = $1; }
+PropertyNameNoES5 + : IdentifierNameNoES5 { $$ = new_string_literal(ctx, compiler_alloc_string_len(ctx->compiler, $1, lstrlenW($1))); } + | tStringLiteral { $$ = new_string_literal(ctx, $1); } + | tNumericLiteral { $$ = $1; } + /* ECMA-262 3rd Edition 7.6 */ Identifier_opt : /* empty*/ { $$ = NULL; } - | tIdentifier { $$ = $1; } + | Identifier { $$ = $1; } + +Identifier + : tIdentifier { $$ = $1; } + | ES5Keyword { $$ = $1; }
/* ECMA-262 5.1 Edition 7.6 */ IdentifierName + : IdentifierNameNoES5 { $$ = $1; } + | ES5Keyword { $$ = $1; } + +IdentifierNameNoES5 : tIdentifier { $$ = $1; } | ReservedAsIdentifier { @@ -890,15 +904,12 @@ ReservedAsIdentifier | kFINALLY { $$ = $1; } | kFOR { $$ = $1; } | kFUNCTION { $$ = $1; } - | kGET { $$ = $1; } | kIF { $$ = $1; } | kIN { $$ = $1; } | kINSTANCEOF { $$ = $1; } - | kLET { $$ = $1; } | kNEW { $$ = $1; } | kNULL { $$ = $1; } | kRETURN { $$ = $1; } - | kSET { $$ = $1; } | kSWITCH { $$ = $1; } | kTHIS { $$ = $1; } | kTHROW { $$ = $1; } @@ -910,6 +921,11 @@ ReservedAsIdentifier | kWHILE { $$ = $1; } | kWITH { $$ = $1; }
+ES5Keyword + : kGET { $$ = $1; } + | kLET { $$ = $1; } + | kSET { $$ = $1; } + /* ECMA-262 3rd Edition 7.8 */ Literal : kNULL { $$ = new_null_literal(ctx); } diff --git a/dlls/mshtml/tests/es5.js b/dlls/mshtml/tests/es5.js index 18b106e2a82..36dfacad139 100644 --- a/dlls/mshtml/tests/es5.js +++ b/dlls/mshtml/tests/es5.js @@ -473,7 +473,14 @@ sync_test("array_sort", function() { });
sync_test("identifier_keywords", function() { + function get(let, set) { { get instanceof (Object); } return let + set; } + set: var let = get(1, 2); + { get /* asdf */: 10 } + var set = 0; var o = { + get: get, + set: set, + let /* comment*/ : let, if: 1, default: 2, function: 3, @@ -486,8 +493,8 @@ sync_test("identifier_keywords", function() { else: true, finally: true, for: true, - in: true, - instanceof: true, + set in(x) { }, + get instanceof() { return 3; }, new: true, return: true, switch: true, @@ -508,6 +515,19 @@ sync_test("identifier_keywords", function() { ok(o.if === 1, "o.if = " + o.if); ok(ro().default === 2, "ro().default = " + ro().default); ok(o.false === true, "o.false = " + o.false); + ok(o.get === get, "o.get = " + o.get); + ok(o.set === set, "o.set = " + o.set); + ok(o.let === let, "o.let = " + o.let); + ok(o.instanceof === 3, "o.instanceof = " + o.instanceof); + + var tmp = false; + try { + eval('function var() { }'); + } + catch(set) { + tmp = true; + } + ok(tmp === true, "Expected exception for 'function var() { }'"); });
function test_own_data_prop_desc(obj, prop, expected_writable, expected_enumerable,
It seems that most of `tIdentifier` -> `Identifier` changes should not depend on lexer changes nor *NoES5. It would be nice to split them.
Unless I'm missing something, `ProprtyDefinition` could have explicit rules like `kGET ':' AssignmentExpression` instead of depending on a lexer hack. Is there anything other than labelled statements that need the lexer hack? Did you see actual code depending that?
On Tue Oct 8 20:31:11 2024 +0000, Jacek Caban wrote:
It seems that most of `tIdentifier` -> `Identifier` changes should not depend on lexer changes nor *NoES5. It would be nice to split them. Unless I'm missing something, `ProprtyDefinition` could have explicit rules like `kGET ':' AssignmentExpression` instead of depending on a lexer hack. Is there anything other than labelled statements that need the lexer hack? Did you see actual code depending that?
Yeah, it's used by some of the common js libs/frameworks/whatever they are called, although not 100% sure if it was for labels as well. The problem is that it generates conflicts, and last time I tried to upstream this (years back, until I revisited it now), I was told not to increase bison conflicts. But I added new tests now and those conflicts were wrong anyway since they were failing to parse.
Thanks for the suggestion, I'll try to duplicate the rules, maybe I can get rid of (some of?) the hacks.
Yeah, it's used by some of the common js libs/frameworks/whatever they are called, although not 100% sure if it was for labels as well.
FWIW, I asked specifically about labelled statements because they seem the least likely to matter and the most problematic for the parser at the same time.