[PATCH v6 0/6] MR10244: Draft: vbscript: fix sub first arg parentheses handling
Closes https://bugs.winehq.org/show_bug.cgi?id=54177 -- v6: improve parser comment https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
From: Francis De Brabandere <francisdb@gmail.com> --- dlls/vbscript/lex.c | 34 ++++++++++++++++++++++++++++++++-- dlls/vbscript/tests/lang.vbs | 8 ++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/dlls/vbscript/lex.c b/dlls/vbscript/lex.c index 8c5c69ea429..a7e8330bbb8 100644 --- a/dlls/vbscript/lex.c +++ b/dlls/vbscript/lex.c @@ -450,7 +450,8 @@ static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx) return comment_line(ctx); ctx->ptr++; return '-'; - case '(': + case '(': { + const WCHAR *paren_pos = ctx->ptr; /* NOTE: * We resolve empty brackets in lexer instead of parser to avoid complex conflicts * in call statement special case |f()| without 'call' keyword @@ -464,10 +465,39 @@ static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx) /* * Parser can't predict if bracket is part of argument expression or an argument * in call expression. We predict it here instead. + * + * In VBScript, 'f(x)' treats parens as call argument parens, but in a statement + * like 'f (x) * 8, y' the parens are expression grouping. We detect this by + * checking for whitespace before '(' and then looking past the matching ')' for + * an operator that cannot start an expression (*, /, \, ^). Such an operator + * after ')' means the '(' must be expression grouping. */ - if(ctx->last_token == tIdentifier || ctx->last_token == ')') + if(ctx->last_token == tIdentifier || ctx->last_token == ')') { + if(paren_pos > ctx->code && (paren_pos[-1] == ' ' || paren_pos[-1] == '\t')) { + const WCHAR *p = ctx->ptr; + int depth = 1; + + while(p < ctx->end && depth > 0) { + if(*p == '(') + depth++; + else if(*p == ')') + depth--; + if(depth > 0) + p++; + } + + if(depth == 0) { + p++; + while(p < ctx->end && (*p == ' ' || *p == '\t')) + p++; + if(p < ctx->end && (*p == '*' || *p == '/' || *p == '\\' || *p == '^')) + return tEXPRLBRACKET; + } + } return '('; + } return tEXPRLBRACKET; + } case '"': return parse_string_literal(ctx, lval); case '#': diff --git a/dlls/vbscript/tests/lang.vbs b/dlls/vbscript/tests/lang.vbs index 0025bfeddcf..89804bdbd97 100644 --- a/dlls/vbscript/tests/lang.vbs +++ b/dlls/vbscript/tests/lang.vbs @@ -983,6 +983,14 @@ Call TestSubExit2 TestSubMultiArgs 1, 2, 3, 4, 5 Call TestSubMultiArgs(1, 2, 3, 4, 5) +Sub TestSubParenExpr(a, b) + Call ok(a=16, "a = " & a) + Call ok(b=7, "b = " & b) +End Sub + +TestSubParenExpr (2) * 8, 7 +TestSubParenExpr 8 * (2), 7 + Sub TestSubLocalVal x = false Call ok(not x, "local x is not false?") -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
From: Francis De Brabandere <francisdb@gmail.com> --- dlls/vbscript/lex.c | 44 ++++++++++++++++++++++++----- dlls/vbscript/parse.h | 1 + dlls/vbscript/parser.y | 13 +++++++-- dlls/vbscript/tests/lang.vbs | 55 ++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 10 deletions(-) diff --git a/dlls/vbscript/lex.c b/dlls/vbscript/lex.c index a7e8330bbb8..ef69d31cd53 100644 --- a/dlls/vbscript/lex.c +++ b/dlls/vbscript/lex.c @@ -466,14 +466,13 @@ static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx) * Parser can't predict if bracket is part of argument expression or an argument * in call expression. We predict it here instead. * - * In VBScript, 'f(x)' treats parens as call argument parens, but in a statement - * like 'f (x) * 8, y' the parens are expression grouping. We detect this by - * checking for whitespace before '(' and then looking past the matching ')' for - * an operator that cannot start an expression (*, /, \, ^). Such an operator - * after ')' means the '(' must be expression grouping. + * In statement context, 'f (x) * 8, y' treats (x) as expression grouping, + * not call parens. Detect this when: identifier/')' precedes '(' with a space, + * and a binary-only operator (*, /, \, ^, &) follows the matching ')'. */ if(ctx->last_token == tIdentifier || ctx->last_token == ')') { - if(paren_pos > ctx->code && (paren_pos[-1] == ' ' || paren_pos[-1] == '\t')) { + if(paren_pos > ctx->code && (paren_pos[-1] == ' ' || paren_pos[-1] == '\t') + && ctx->is_statement_ctx) { const WCHAR *p = ctx->ptr; int depth = 1; @@ -490,7 +489,7 @@ static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx) p++; while(p < ctx->end && (*p == ' ' || *p == '\t')) p++; - if(p < ctx->end && (*p == '*' || *p == '/' || *p == '\\' || *p == '^')) + if(p < ctx->end && (*p == '*' || *p == '/' || *p == '\\' || *p == '^' || *p == '&')) return tEXPRLBRACKET; } } @@ -576,5 +575,36 @@ int parser_lex(void *lval, unsigned *loc, parser_ctx_t *ctx) ctx->last_nl = ctx->ptr-ctx->code; } + /* Update is_statement_ctx: TRUE at the start of a statement, persists + * through dotted member chains (e.g. 'obj.prop.method'), FALSE + * once we see an operator or other expression token. + * An identifier keeps the state only if it follows a statement-start + * token or a dot (member chain); otherwise it's an argument. */ + switch(ret) { + case tNL: + case ':': + case tTHEN: + case tELSE: + case tELSEIF: + ctx->is_statement_ctx = TRUE; + break; + case tDOT: + case '.': + /* Dots in member chains keep current state. */ + break; + case tIdentifier: + /* First identifier after statement start or after a dot keeps TRUE. + * An identifier after another identifier (e.g. 'Sub Arg') means + * we're in argument context, so set to FALSE. */ + if(ctx->last_token != '.' && ctx->last_token != tDOT + && ctx->last_token != tNL && ctx->last_token != ':' + && ctx->last_token != tTHEN && ctx->last_token != tELSE + && ctx->last_token != tELSEIF) + ctx->is_statement_ctx = FALSE; + break; + default: + ctx->is_statement_ctx = FALSE; + break; + } return (ctx->last_token = ret); } diff --git a/dlls/vbscript/parse.h b/dlls/vbscript/parse.h index 64c3dee2a69..df9243a3c13 100644 --- a/dlls/vbscript/parse.h +++ b/dlls/vbscript/parse.h @@ -301,6 +301,7 @@ typedef struct { LCID lcid; int last_token; + BOOL is_statement_ctx; unsigned last_nl; statement_t *stats; diff --git a/dlls/vbscript/parser.y b/dlls/vbscript/parser.y index 20c30eed681..cdf5f5901bb 100644 --- a/dlls/vbscript/parser.y +++ b/dlls/vbscript/parser.y @@ -756,9 +756,15 @@ static call_expression_t *make_call_expression(parser_ctx_t *ctx, expression_t * return call_expr; if(arguments->type != EXPR_NOARG) { - FIXME("Invalid syntax: missing comma\n"); - ctx->hres = E_FAIL; - return NULL; + /* Handle 'f (x) + expr, ...' in statement context: the parser saw f(x) as + * a call with arg x, and '+ expr' as a unary-plus expression. Windows VBScript + * treats this as f called with (x) + expr as first arg, followed by remaining args. */ + expression_t *remaining = arguments->next; + call_expr->args = new_binary_expression(ctx, EXPR_ADD, call_expr->args, arguments); + if(!call_expr->args) + return NULL; + call_expr->args->next = remaining; + return call_expr; } call_expr->args->next = arguments->next; @@ -1225,6 +1231,7 @@ HRESULT parse_script(parser_ctx_t *ctx, const WCHAR *code, const WCHAR *delimite ctx->hres = S_OK; ctx->error_loc = -1; ctx->last_token = tNL; + ctx->is_statement_ctx = TRUE; ctx->last_nl = 0; ctx->stats = ctx->stats_tail = NULL; ctx->class_decls = NULL; diff --git a/dlls/vbscript/tests/lang.vbs b/dlls/vbscript/tests/lang.vbs index 89804bdbd97..901d217db56 100644 --- a/dlls/vbscript/tests/lang.vbs +++ b/dlls/vbscript/tests/lang.vbs @@ -991,6 +991,61 @@ End Sub TestSubParenExpr (2) * 8, 7 TestSubParenExpr 8 * (2), 7 +Sub TestSubParenExprAdd(a, b) + Call ok(a=6, "a = " & a) + Call ok(b=7, "b = " & b) +End Sub + +TestSubParenExprAdd (2) + 4, 7 +TestSubParenExprAdd 4 + (2), 7 + +Sub TestSubParenExprNoSpace(a) + Call ok(a=6, "a = " & a) +End Sub + +TestSubParenExprNoSpace(10 \ 2) + 1 + +' Regression test: function call with space before ( in expression context +' e.g. x = (CInt (2) + 1) * 3 must parse and evaluate correctly +x = CInt (2) + 1 +Call ok(x = 3, "CInt (2) + 1 = " & x) +x = (CInt (2) + 1) * 3 +Call ok(x = 9, "(CInt (2) + 1) * 3 = " & x) + +' Regression test: function call with space before ( and * in expression context +' e.g. x = CInt (2) * 3 must treat CInt (2) as a function call, not expression grouping +x = CInt (2) * 3 +Call ok(x = 6, "CInt (2) * 3 = " & x) + +' Test member expression in statement context: obj.Method (x) * y, z +Class TestObjParenExpr + Sub Check(a, b) + Call ok(a=16, "obj a = " & a) + Call ok(b=7, "obj b = " & b) + End Sub +End Class + +Dim objParenExpr +Set objParenExpr = New TestObjParenExpr +objParenExpr.Check (2) * 8, 7 + +Sub TestSubParenExprConcat(a, b) + Call ok(a="helloworld", "a = " & a) + Call ok(b=7, "b = " & b) +End Sub + +TestSubParenExprConcat ("hello") & "world", 7 + +' Test: function call as argument with & after paren must be a call, not grouping +' e.g. TestSub Mid ("hello", 2) & "x" should call TestSub with "ellox" +' Mid("hello", 2) returns "ello", & "x" concatenates to "ellox" +Sub TestSubArgCallConcat(a) + Call ok(a="ellox", "a = " & a) +End Sub + +TestSubArgCallConcat Mid ("hello", 2) & "x" + + Sub TestSubLocalVal x = false Call ok(not x, "local x is not false?") -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
From: Francis De Brabandere <francisdb@gmail.com> --- dlls/vbscript/lex.c | 67 ++++++++++++++++++++---------------- dlls/vbscript/parse.h | 1 + dlls/vbscript/parser.y | 1 + dlls/vbscript/tests/lang.vbs | 36 +++++++++++++++++++ 4 files changed, 76 insertions(+), 29 deletions(-) diff --git a/dlls/vbscript/lex.c b/dlls/vbscript/lex.c index ef69d31cd53..670010363ae 100644 --- a/dlls/vbscript/lex.c +++ b/dlls/vbscript/lex.c @@ -575,36 +575,45 @@ int parser_lex(void *lval, unsigned *loc, parser_ctx_t *ctx) ctx->last_nl = ctx->ptr-ctx->code; } - /* Update is_statement_ctx: TRUE at the start of a statement, persists - * through dotted member chains (e.g. 'obj.prop.method'), FALSE - * once we see an operator or other expression token. - * An identifier keeps the state only if it follows a statement-start - * token or a dot (member chain); otherwise it's an argument. */ - switch(ret) { - case tNL: - case ':': - case tTHEN: - case tELSE: - case tELSEIF: - ctx->is_statement_ctx = TRUE; - break; - case tDOT: - case '.': - /* Dots in member chains keep current state. */ - break; - case tIdentifier: - /* First identifier after statement start or after a dot keeps TRUE. - * An identifier after another identifier (e.g. 'Sub Arg') means - * we're in argument context, so set to FALSE. */ - if(ctx->last_token != '.' && ctx->last_token != tDOT - && ctx->last_token != tNL && ctx->last_token != ':' - && ctx->last_token != tTHEN && ctx->last_token != tELSE - && ctx->last_token != tELSEIF) + /* Track paren depth and update is_statement_ctx only at depth 0. + * Tokens inside parentheses (e.g. the 'idx' in 'obj(idx).method') + * must not affect statement context tracking. */ + if(ret == '(' || ret == tEXPRLBRACKET) + ctx->paren_depth++; + else if(ret == ')' || ret == tEMPTYBRACKETS) { + if(ctx->paren_depth > 0) + ctx->paren_depth--; + } + + if(ctx->paren_depth == 0) { + switch(ret) { + case tNL: + case ':': + case tTHEN: + case tELSE: + case tELSEIF: + ctx->is_statement_ctx = TRUE; + break; + case tDOT: + case '.': + case ')': + case tEMPTYBRACKETS: + /* Dots and closing parens in member chains keep current state. */ + break; + case tIdentifier: + /* First identifier after statement start or after a dot keeps TRUE. + * An identifier after another identifier (e.g. 'Sub Arg') means + * we're in argument context, so set to FALSE. */ + if(ctx->last_token != '.' && ctx->last_token != tDOT + && ctx->last_token != tNL && ctx->last_token != ':' + && ctx->last_token != tTHEN && ctx->last_token != tELSE + && ctx->last_token != tELSEIF) + ctx->is_statement_ctx = FALSE; + break; + default: ctx->is_statement_ctx = FALSE; - break; - default: - ctx->is_statement_ctx = FALSE; - break; + break; + } } return (ctx->last_token = ret); } diff --git a/dlls/vbscript/parse.h b/dlls/vbscript/parse.h index df9243a3c13..b9fdcfb843a 100644 --- a/dlls/vbscript/parse.h +++ b/dlls/vbscript/parse.h @@ -302,6 +302,7 @@ typedef struct { int last_token; BOOL is_statement_ctx; + int paren_depth; unsigned last_nl; statement_t *stats; diff --git a/dlls/vbscript/parser.y b/dlls/vbscript/parser.y index cdf5f5901bb..4bc321e47c9 100644 --- a/dlls/vbscript/parser.y +++ b/dlls/vbscript/parser.y @@ -1232,6 +1232,7 @@ HRESULT parse_script(parser_ctx_t *ctx, const WCHAR *code, const WCHAR *delimite ctx->error_loc = -1; ctx->last_token = tNL; ctx->is_statement_ctx = TRUE; + ctx->paren_depth = 0; ctx->last_nl = 0; ctx->stats = ctx->stats_tail = NULL; ctx->class_decls = NULL; diff --git a/dlls/vbscript/tests/lang.vbs b/dlls/vbscript/tests/lang.vbs index 901d217db56..b6d084ec2a5 100644 --- a/dlls/vbscript/tests/lang.vbs +++ b/dlls/vbscript/tests/lang.vbs @@ -1045,6 +1045,23 @@ End Sub TestSubArgCallConcat Mid ("hello", 2) & "x" +' Test: obj(idx).method (expr) * val, y in statement context +' The (expr) after .method must be expression grouping, not call paren +Class TestIndexedObjParenExpr + Public arr_(1) + Public Sub Init() + Set arr_(0) = New TestObjParenExpr + End Sub + Public Default Property Get Item(idx) + Set Item = arr_(idx) + End Property +End Class + +Dim idxObj +Set idxObj = New TestIndexedObjParenExpr +Call idxObj.Init() +idxObj(0).Check (2) * 8, 7 + Sub TestSubLocalVal x = false @@ -1442,6 +1459,25 @@ Call obj.test(obj) Call ok(getVT(test) = "VT_DISPATCH", "getVT(test) = " & getVT(test)) Call ok(Me is Test, "Me is not Test") +' Me(idx) should parse as accessing the default property of Me +' https://bugs.winehq.org/show_bug.cgi?id=58248 +'Class TestMeIndex +' Private arr_(1) +' Public Default Property Get Item(idx) +' Item = arr_(idx) +' End Property +' Public Sub SetVal(idx, val) +' arr_(idx) = val +' End Sub +' Public Sub TestAccess() +' SetVal 0, "hello" +' Call ok(Me(0) = "hello", "Me(0) = " & Me(0)) +' End Sub +'End Class +' +'Set obj = New TestMeIndex +'Call obj.TestAccess() + Const c1 = 1, c2 = 2, c3 = -3 Private Const c4 = 4 Public Const c5 = 5 -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
From: Francis De Brabandere <francisdb@gmail.com> --- dlls/vbscript/tests/lang.vbs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/dlls/vbscript/tests/lang.vbs b/dlls/vbscript/tests/lang.vbs index b6d084ec2a5..903b431e641 100644 --- a/dlls/vbscript/tests/lang.vbs +++ b/dlls/vbscript/tests/lang.vbs @@ -1459,25 +1459,6 @@ Call obj.test(obj) Call ok(getVT(test) = "VT_DISPATCH", "getVT(test) = " & getVT(test)) Call ok(Me is Test, "Me is not Test") -' Me(idx) should parse as accessing the default property of Me -' https://bugs.winehq.org/show_bug.cgi?id=58248 -'Class TestMeIndex -' Private arr_(1) -' Public Default Property Get Item(idx) -' Item = arr_(idx) -' End Property -' Public Sub SetVal(idx, val) -' arr_(idx) = val -' End Sub -' Public Sub TestAccess() -' SetVal 0, "hello" -' Call ok(Me(0) = "hello", "Me(0) = " & Me(0)) -' End Sub -'End Class -' -'Set obj = New TestMeIndex -'Call obj.TestAccess() - Const c1 = 1, c2 = 2, c3 = -3 Private Const c4 = 4 Public Const c5 = 5 -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
From: Francis De Brabandere <francisdb@gmail.com> --- dlls/vbscript/parser.y | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dlls/vbscript/parser.y b/dlls/vbscript/parser.y index 4bc321e47c9..b89f5c059b7 100644 --- a/dlls/vbscript/parser.y +++ b/dlls/vbscript/parser.y @@ -756,9 +756,7 @@ static call_expression_t *make_call_expression(parser_ctx_t *ctx, expression_t * return call_expr; if(arguments->type != EXPR_NOARG) { - /* Handle 'f (x) + expr, ...' in statement context: the parser saw f(x) as - * a call with arg x, and '+ expr' as a unary-plus expression. Windows VBScript - * treats this as f called with (x) + expr as first arg, followed by remaining args. */ + /* 'f (x) + expr, ...' — combine bracketed arg with the +/- expression. */ expression_t *remaining = arguments->next; call_expr->args = new_binary_expression(ctx, EXPR_ADD, call_expr->args, arguments); if(!call_expr->args) @@ -1088,7 +1086,7 @@ static function_decl_t *new_function_decl(parser_ctx_t *ctx, const WCHAR *name, decl->args = arg_decl; decl->body = body; decl->next = NULL; - decl->next_prop_func = NULL; + devbscript: more parentheses related parser fixescl->next_prop_func = NULL; return decl; } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
From: Francis De Brabandere <francisdb@gmail.com> --- dlls/vbscript/parser.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlls/vbscript/parser.y b/dlls/vbscript/parser.y index b89f5c059b7..705e5d37308 100644 --- a/dlls/vbscript/parser.y +++ b/dlls/vbscript/parser.y @@ -1086,7 +1086,7 @@ static function_decl_t *new_function_decl(parser_ctx_t *ctx, const WCHAR *name, decl->args = arg_decl; decl->body = body; decl->next = NULL; - devbscript: more parentheses related parser fixescl->next_prop_func = NULL; + decl->next_prop_func = NULL; return decl; } -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
participants (2)
-
Francis De Brabandere -
Francis De Brabandere (@francisdb)