-- v2: findstr: Support search with regular expressions.
From: Hans Leidekker hans@codeweavers.com
--- programs/findstr/main.c | 64 +++++++++++++++++++++++++++++--- programs/findstr/tests/findstr.c | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 6 deletions(-)
diff --git a/programs/findstr/main.c b/programs/findstr/main.c index f8f2de594c5..3e493bb3c71 100644 --- a/programs/findstr/main.c +++ b/programs/findstr/main.c @@ -97,9 +97,9 @@ static void add_string(struct findstr_string **head, const WCHAR *string) *ptr = new_string; }
-static const WCHAR *substring(const WCHAR *str, const WCHAR *substr, BOOL case_sensitive) +static BOOL match_substring(const WCHAR *str, const WCHAR *substr, BOOL case_sensitive) { - if (case_sensitive) return wcsstr(str, substr); + if (case_sensitive) return !!wcsstr(str, substr);
while (*str) { @@ -110,11 +110,53 @@ static const WCHAR *substring(const WCHAR *str, const WCHAR *substr, BOOL case_s p1++; p2++; } - if (!*p2) return str; + if (!*p2) return TRUE; str++; }
- return NULL; + return FALSE; +} + +static inline BOOL is_op(WCHAR c, const WCHAR *regexp, UINT pos) +{ + if (!pos) return (*regexp == c); + return (regexp[pos] == c && regexp[pos - 1] != '\'); +} + +static inline BOOL match_char(WCHAR c1, WCHAR c2, BOOL case_sensitive) +{ + if (case_sensitive) return c1 == c2; + return towlower(c1) == towlower(c2); +} + +static BOOL match_star(WCHAR, const WCHAR *, const WCHAR *, UINT, BOOL); + +static BOOL match_here(const WCHAR *str, const WCHAR *regexp, UINT pos, BOOL case_sensitive) +{ + if (regexp[pos] == '\' && regexp[pos + 1]) pos++; + if (!regexp[pos]) return TRUE; + if (is_op('*', regexp, pos + 1)) return match_star(regexp[pos], str, regexp, pos + 2, case_sensitive); + if (is_op('$', regexp, pos) && !regexp[pos + 1]) return (*str == '\n'); + if (*str && (is_op('.', regexp, pos) || match_char(*str, regexp[pos], case_sensitive))) + return match_here(str + 1, regexp, pos + 1, case_sensitive); + return FALSE; +} + +static BOOL match_star(WCHAR c, const WCHAR *str, const WCHAR *regexp, UINT pos, BOOL case_sensitive) +{ + do { if (match_here(str, regexp, pos, case_sensitive)) return TRUE; } + while (*str && (match_char(*str++, c, case_sensitive) || c == '.')); + return FALSE; +} + +static BOOL match_regexp(const WCHAR *str, const WCHAR *regexp, BOOL case_sensitive) +{ + if (wcsstr(regexp, L"[")) FIXME("character ranges (i.e. [abc], [^a-z]) are not supported\n"); + if (wcsstr(regexp, L"\<") || wcsstr(regexp, L"\>")) FIXME("word position (i.e. \< and \>) not supported\n"); + + if (regexp[0] == '^') return match_here(str, regexp, 1, case_sensitive); + do { if (match_here(str, regexp, 0, case_sensitive)) return TRUE; } while (*str++); + return FALSE; }
int __cdecl wmain(int argc, WCHAR *argv[]) @@ -122,7 +164,7 @@ int __cdecl wmain(int argc, WCHAR *argv[]) struct findstr_string *string_head = NULL, *current_string, *next_string; struct findstr_file *file_head = NULL, *current_file, *next_file; WCHAR *string, *ptr, *buffer, line[MAXSTRING]; - BOOL has_string = FALSE, has_file = FALSE, case_sensitive = TRUE; + BOOL has_string = FALSE, has_file = FALSE, case_sensitive = TRUE, regular_expression = FALSE; int ret = 1, i, j;
for (i = 0; i < argc; i++) @@ -175,6 +217,10 @@ int __cdecl wmain(int argc, WCHAR *argv[]) case 'i': case_sensitive = FALSE; break; + case 'R': + case 'r': + regular_expression = TRUE; + break; default: findstr_error_wprintf(STRING_IGNORED, argv[i][j]); break; @@ -220,7 +266,13 @@ int __cdecl wmain(int argc, WCHAR *argv[]) current_string = string_head; while (current_string) { - if (substring(line, current_string->string, case_sensitive)) + BOOL match; + + if (regular_expression) + match = match_regexp(line, current_string->string, case_sensitive); + else + match = match_substring(line, current_string->string, case_sensitive); + if (match) { wprintf(line); if (current_file->file == stdin) diff --git a/programs/findstr/tests/findstr.c b/programs/findstr/tests/findstr.c index 3ededf7551b..15ce8108b41 100644 --- a/programs/findstr/tests/findstr.c +++ b/programs/findstr/tests/findstr.c @@ -201,6 +201,70 @@ static void test_basic(void) ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); ret = strcmp(stdout_buffer, "abc"); ok(!ret, "Got the wrong result.\n"); + + /* find string in file, regular expression */ + run_find_file("/R abc", "abc", 0); + ok(stdout_size > 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + ret = strcmp(stdout_buffer, "abc"); + ok(!ret, "Got the wrong result.\n"); + + /* find string in file with /C:, regular expression */ + run_find_file("/R /C:^abc", "abc", 0); + ok(stdout_size > 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + ret = strcmp(stdout_buffer, "abc"); + ok(!ret, "Got the wrong result.\n"); + + /* find string in file, regular expression, case insensitive */ + run_find_file("/I /R /C:.Bc", "abc", 0); + ok(stdout_size > 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + ret = strcmp(stdout_buffer, "abc"); + ok(!ret, "Got the wrong result.\n"); + + /* find string in file, regular expression, escape */ + run_find_file("/R /C:\.bc", "abc", 1); + ok(stdout_size == 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + + /* $ doesn't match if there's no newline */ + run_find_file("/R /C:abc$", "abc", 1); + ok(stdout_size == 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + + run_find_file("/R /C:abc$", "abc\r\n", 0); + ok(stdout_size > 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + ret = strcmp(stdout_buffer, "abc\r\n"); + ok(!ret, "Got the wrong result. '%s'\n", stdout_buffer); + + /* escaped . before * */ + run_find_file("/R /C:\.*", "...", 0); + ok(stdout_size > 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + ret = strcmp(stdout_buffer, "..."); + ok(!ret, "Got the wrong result. '%s'\n", stdout_buffer); + + run_find_file("/R /C:\.*", "abc", 0); + ok(stdout_size > 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + ret = strcmp(stdout_buffer, "abc"); + ok(!ret, "Got the wrong result. '%s'\n", stdout_buffer); + + /* ^ after first character */ + run_find_file("/R /C:a^bc", "a^bc", 0); + ok(stdout_size > 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + ret = strcmp(stdout_buffer, "a^bc"); + ok(!ret, "Got the wrong result. '%s'\n", stdout_buffer); + + /* $ before last character */ + run_find_file("/R /C:ab$c", "ab$c", 0); + ok(stdout_size > 0, "Unexpected stdout buffer size %ld.\n", stdout_size); + ok(stderr_size == 0, "Unexpected stderr buffer size %ld.\n", stderr_size); + ret = strcmp(stdout_buffer, "ab$c"); + ok(!ret, "Got the wrong result. '%s'\n", stdout_buffer); }
START_TEST(findstr)
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=148733
Your paranoid android.
=== debian11b (64 bit WoW report) ===
d3d9: d3d9ex.c:3230: Test failed: Expected message 0x18 for window 0, but didn't receive it, i=0. d3d9ex.c:3236: Test failed: Got unexpected WINDOWPOS hwnd=0000000000000000, x=0, y=0, cx=0, cy=0, flags=0
Thanks for adding the tests. Feel free to resolve threads above if they indeed pass.
On Fri Sep 27 13:55:48 2024 +0000, Jinoh Kang wrote:
if (wcsstr(regexp + 1, L"^")) FIXME("metacharacter '^' found after first char\n"); If (wcscspn(regexp, L"$") + 1 < wcslen(regexp)) FIXME("metacharacter '$' found before last char\n");
(To implement these FIXMEs, we should never match regexes like `$nevermatches^` but still match `$.*` (untested)).
I've added some test cases and they happen to pass already. Can you check that I did it right?