[PATCH v2 0/2] MR10244: Draft: vbscript: fix sub first arg parentheses handling
Closes https://bugs.winehq.org/show_bug.cgi?id=54177 -- v2: vbscript: more parentheses related parser fixes https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
From: Francis De Brabandere <francisdb@gmail.com> --- dlls/vbscript/lex.c | 34 ++++++++++++++++++++++++++++++++-- dlls/vbscript/tests/lang.vbs | 8 ++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/dlls/vbscript/lex.c b/dlls/vbscript/lex.c index 8c5c69ea429..a7e8330bbb8 100644 --- a/dlls/vbscript/lex.c +++ b/dlls/vbscript/lex.c @@ -450,7 +450,8 @@ static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx) return comment_line(ctx); ctx->ptr++; return '-'; - case '(': + case '(': { + const WCHAR *paren_pos = ctx->ptr; /* NOTE: * We resolve empty brackets in lexer instead of parser to avoid complex conflicts * in call statement special case |f()| without 'call' keyword @@ -464,10 +465,39 @@ static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx) /* * Parser can't predict if bracket is part of argument expression or an argument * in call expression. We predict it here instead. + * + * In VBScript, 'f(x)' treats parens as call argument parens, but in a statement + * like 'f (x) * 8, y' the parens are expression grouping. We detect this by + * checking for whitespace before '(' and then looking past the matching ')' for + * an operator that cannot start an expression (*, /, \, ^). Such an operator + * after ')' means the '(' must be expression grouping. */ - if(ctx->last_token == tIdentifier || ctx->last_token == ')') + if(ctx->last_token == tIdentifier || ctx->last_token == ')') { + if(paren_pos > ctx->code && (paren_pos[-1] == ' ' || paren_pos[-1] == '\t')) { + const WCHAR *p = ctx->ptr; + int depth = 1; + + while(p < ctx->end && depth > 0) { + if(*p == '(') + depth++; + else if(*p == ')') + depth--; + if(depth > 0) + p++; + } + + if(depth == 0) { + p++; + while(p < ctx->end && (*p == ' ' || *p == '\t')) + p++; + if(p < ctx->end && (*p == '*' || *p == '/' || *p == '\\' || *p == '^')) + return tEXPRLBRACKET; + } + } return '('; + } return tEXPRLBRACKET; + } case '"': return parse_string_literal(ctx, lval); case '#': diff --git a/dlls/vbscript/tests/lang.vbs b/dlls/vbscript/tests/lang.vbs index 0025bfeddcf..89804bdbd97 100644 --- a/dlls/vbscript/tests/lang.vbs +++ b/dlls/vbscript/tests/lang.vbs @@ -983,6 +983,14 @@ Call TestSubExit2 TestSubMultiArgs 1, 2, 3, 4, 5 Call TestSubMultiArgs(1, 2, 3, 4, 5) +Sub TestSubParenExpr(a, b) + Call ok(a=16, "a = " & a) + Call ok(b=7, "b = " & b) +End Sub + +TestSubParenExpr (2) * 8, 7 +TestSubParenExpr 8 * (2), 7 + Sub TestSubLocalVal x = false Call ok(not x, "local x is not false?") -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
From: Francis De Brabandere <francisdb@gmail.com> --- dlls/vbscript/lex.c | 44 ++++++++++++++++++++++++----- dlls/vbscript/parse.h | 1 + dlls/vbscript/parser.y | 13 +++++++-- dlls/vbscript/tests/lang.vbs | 55 ++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 10 deletions(-) diff --git a/dlls/vbscript/lex.c b/dlls/vbscript/lex.c index a7e8330bbb8..ef69d31cd53 100644 --- a/dlls/vbscript/lex.c +++ b/dlls/vbscript/lex.c @@ -466,14 +466,13 @@ static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx) * Parser can't predict if bracket is part of argument expression or an argument * in call expression. We predict it here instead. * - * In VBScript, 'f(x)' treats parens as call argument parens, but in a statement - * like 'f (x) * 8, y' the parens are expression grouping. We detect this by - * checking for whitespace before '(' and then looking past the matching ')' for - * an operator that cannot start an expression (*, /, \, ^). Such an operator - * after ')' means the '(' must be expression grouping. + * In statement context, 'f (x) * 8, y' treats (x) as expression grouping, + * not call parens. Detect this when: identifier/')' precedes '(' with a space, + * and a binary-only operator (*, /, \, ^, &) follows the matching ')'. */ if(ctx->last_token == tIdentifier || ctx->last_token == ')') { - if(paren_pos > ctx->code && (paren_pos[-1] == ' ' || paren_pos[-1] == '\t')) { + if(paren_pos > ctx->code && (paren_pos[-1] == ' ' || paren_pos[-1] == '\t') + && ctx->is_statement_ctx) { const WCHAR *p = ctx->ptr; int depth = 1; @@ -490,7 +489,7 @@ static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx) p++; while(p < ctx->end && (*p == ' ' || *p == '\t')) p++; - if(p < ctx->end && (*p == '*' || *p == '/' || *p == '\\' || *p == '^')) + if(p < ctx->end && (*p == '*' || *p == '/' || *p == '\\' || *p == '^' || *p == '&')) return tEXPRLBRACKET; } } @@ -576,5 +575,36 @@ int parser_lex(void *lval, unsigned *loc, parser_ctx_t *ctx) ctx->last_nl = ctx->ptr-ctx->code; } + /* Update is_statement_ctx: TRUE at the start of a statement, persists + * through dotted member chains (e.g. 'obj.prop.method'), FALSE + * once we see an operator or other expression token. + * An identifier keeps the state only if it follows a statement-start + * token or a dot (member chain); otherwise it's an argument. */ + switch(ret) { + case tNL: + case ':': + case tTHEN: + case tELSE: + case tELSEIF: + ctx->is_statement_ctx = TRUE; + break; + case tDOT: + case '.': + /* Dots in member chains keep current state. */ + break; + case tIdentifier: + /* First identifier after statement start or after a dot keeps TRUE. + * An identifier after another identifier (e.g. 'Sub Arg') means + * we're in argument context, so set to FALSE. */ + if(ctx->last_token != '.' && ctx->last_token != tDOT + && ctx->last_token != tNL && ctx->last_token != ':' + && ctx->last_token != tTHEN && ctx->last_token != tELSE + && ctx->last_token != tELSEIF) + ctx->is_statement_ctx = FALSE; + break; + default: + ctx->is_statement_ctx = FALSE; + break; + } return (ctx->last_token = ret); } diff --git a/dlls/vbscript/parse.h b/dlls/vbscript/parse.h index 64c3dee2a69..df9243a3c13 100644 --- a/dlls/vbscript/parse.h +++ b/dlls/vbscript/parse.h @@ -301,6 +301,7 @@ typedef struct { LCID lcid; int last_token; + BOOL is_statement_ctx; unsigned last_nl; statement_t *stats; diff --git a/dlls/vbscript/parser.y b/dlls/vbscript/parser.y index 20c30eed681..cdf5f5901bb 100644 --- a/dlls/vbscript/parser.y +++ b/dlls/vbscript/parser.y @@ -756,9 +756,15 @@ static call_expression_t *make_call_expression(parser_ctx_t *ctx, expression_t * return call_expr; if(arguments->type != EXPR_NOARG) { - FIXME("Invalid syntax: missing comma\n"); - ctx->hres = E_FAIL; - return NULL; + /* Handle 'f (x) + expr, ...' in statement context: the parser saw f(x) as + * a call with arg x, and '+ expr' as a unary-plus expression. Windows VBScript + * treats this as f called with (x) + expr as first arg, followed by remaining args. */ + expression_t *remaining = arguments->next; + call_expr->args = new_binary_expression(ctx, EXPR_ADD, call_expr->args, arguments); + if(!call_expr->args) + return NULL; + call_expr->args->next = remaining; + return call_expr; } call_expr->args->next = arguments->next; @@ -1225,6 +1231,7 @@ HRESULT parse_script(parser_ctx_t *ctx, const WCHAR *code, const WCHAR *delimite ctx->hres = S_OK; ctx->error_loc = -1; ctx->last_token = tNL; + ctx->is_statement_ctx = TRUE; ctx->last_nl = 0; ctx->stats = ctx->stats_tail = NULL; ctx->class_decls = NULL; diff --git a/dlls/vbscript/tests/lang.vbs b/dlls/vbscript/tests/lang.vbs index 89804bdbd97..901d217db56 100644 --- a/dlls/vbscript/tests/lang.vbs +++ b/dlls/vbscript/tests/lang.vbs @@ -991,6 +991,61 @@ End Sub TestSubParenExpr (2) * 8, 7 TestSubParenExpr 8 * (2), 7 +Sub TestSubParenExprAdd(a, b) + Call ok(a=6, "a = " & a) + Call ok(b=7, "b = " & b) +End Sub + +TestSubParenExprAdd (2) + 4, 7 +TestSubParenExprAdd 4 + (2), 7 + +Sub TestSubParenExprNoSpace(a) + Call ok(a=6, "a = " & a) +End Sub + +TestSubParenExprNoSpace(10 \ 2) + 1 + +' Regression test: function call with space before ( in expression context +' e.g. x = (CInt (2) + 1) * 3 must parse and evaluate correctly +x = CInt (2) + 1 +Call ok(x = 3, "CInt (2) + 1 = " & x) +x = (CInt (2) + 1) * 3 +Call ok(x = 9, "(CInt (2) + 1) * 3 = " & x) + +' Regression test: function call with space before ( and * in expression context +' e.g. x = CInt (2) * 3 must treat CInt (2) as a function call, not expression grouping +x = CInt (2) * 3 +Call ok(x = 6, "CInt (2) * 3 = " & x) + +' Test member expression in statement context: obj.Method (x) * y, z +Class TestObjParenExpr + Sub Check(a, b) + Call ok(a=16, "obj a = " & a) + Call ok(b=7, "obj b = " & b) + End Sub +End Class + +Dim objParenExpr +Set objParenExpr = New TestObjParenExpr +objParenExpr.Check (2) * 8, 7 + +Sub TestSubParenExprConcat(a, b) + Call ok(a="helloworld", "a = " & a) + Call ok(b=7, "b = " & b) +End Sub + +TestSubParenExprConcat ("hello") & "world", 7 + +' Test: function call as argument with & after paren must be a call, not grouping +' e.g. TestSub Mid ("hello", 2) & "x" should call TestSub with "ellox" +' Mid("hello", 2) returns "ello", & "x" concatenates to "ellox" +Sub TestSubArgCallConcat(a) + Call ok(a="ellox", "a = " & a) +End Sub + +TestSubArgCallConcat Mid ("hello", 2) & "x" + + Sub TestSubLocalVal x = false Call ok(not x, "local x is not false?") -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10244
participants (2)
-
Francis De Brabandere -
Francis De Brabandere (@francisdb)