Huw Davies : riched20: Fix URL auto-detection to handle 'neutral' characters.

1 Jun 2016

Module: wine
Branch: master
Commit: ea1e44e4f698b278e3e980914188f414630bfc31
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=ea1e44e4f698b278e3e9809141...
Author: Huw Davies huw@codeweavers.com
Date:   Wed Jun  1 12:35:37 2016 +0100
riched20: Fix URL auto-detection to handle 'neutral' characters.
Signed-off-by: Huw Davies huw@codeweavers.com
Signed-off-by: Alexandre Julliard julliard@winehq.org
---
dlls/riched20/editor.c       | 124 ++++++++++++++++++++-----------------------
 dlls/riched20/tests/editor.c |  77 +++++++++++++++++++++++++++
 2 files changed, 136 insertions(+), 65 deletions(-)

diff --git a/dlls/riched20/editor.c b/dlls/riched20/editor.c
index 85395e7..d4ca570 100644
--- a/dlls/riched20/editor.c
+++ b/dlls/riched20/editor.c
@@ -5005,10 +5005,24 @@ LRESULT WINAPI REExtendedRegisterClass(void)
   return result;
 }
-static BOOL isurlspecial(WCHAR c)
+static int wchar_comp( const void *key, const void *elem )
 {
-  static const WCHAR special_chars[] = {'.','/','%','@','*','|','\','+','#',0};
-  return strchrW( special_chars, c ) != NULL;
+    return *(const WCHAR *)key - *(const WCHAR *)elem;
+}
+
+/* neutral characters end the url if the next non-neutral character is a space character,
+   otherwise they are included in the url. */
+static BOOL isurlneutral( WCHAR c )
+{
+    /* NB this list is sorted */
+    static const WCHAR neutral_chars[] = {'!','"',''','(',')',',','-','.',':',';','<','>','?','[',']','{','}'};
+
+    /* Some shortcuts */
+    if (isalnum( c )) return FALSE;
+    if (c > neutral_chars[sizeof(neutral_chars) / sizeof(neutral_chars[0]) - 1]) return FALSE;
+
+    return !!bsearch( &c, neutral_chars, sizeof(neutral_chars) / sizeof(neutral_chars[0]),
+                      sizeof(c), wchar_comp );
 }
/**
@@ -5024,87 +5038,67 @@ static BOOL ME_FindNextURLCandidate(ME_TextEditor *editor,
                                     ME_Cursor *candidate_min,
                                     ME_Cursor *candidate_max)
 {
-  ME_Cursor cursor = *start;
-  BOOL foundColon = FALSE;
+  ME_Cursor cursor = *start, neutral_end;
   BOOL candidateStarted = FALSE;
-  WCHAR lastAcceptedChar = '\0';
+  WCHAR c;
while (nChars > 0)
   {
-    WCHAR *strStart = get_text( &cursor.pRun->member.run, 0 );
-    WCHAR *str = strStart + cursor.nOffset;
-    int nLen = cursor.pRun->member.run.len - cursor.nOffset;
-    nChars -= nLen;
+    WCHAR *str = get_text( &cursor.pRun->member.run, 0 );
+    int run_len = cursor.pRun->member.run.len;
+
+    nChars -= run_len - cursor.nOffset;
-    if (~cursor.pRun->member.run.nFlags & MERF_ENDPARA)
+    /* Find start of candidate */
+    if (!candidateStarted)
     {
-      /* Find start of candidate */
-      if (!candidateStarted)
+      while (cursor.nOffset < run_len)
       {
-        while (nLen)
+        c = str[cursor.nOffset];
+        if (!isspaceW( c ) && !isurlneutral( c ))
         {
-          nLen--;
-          if (isalnumW(*str) || isurlspecial(*str))
-          {
-            cursor.nOffset = str - strStart;
-            *candidate_min = cursor;
-            candidateStarted = TRUE;
-            lastAcceptedChar = *str++;
-            break;
-          }
-          str++;
+          *candidate_min = cursor;
+          candidateStarted = TRUE;
+          neutral_end.pPara = NULL;
+          cursor.nOffset++;
+          break;
         }
+        cursor.nOffset++;
       }
+    }
-      /* Find end of candidate */
-      if (candidateStarted) {
-        while (nLen)
+    /* Find end of candidate */
+    if (candidateStarted)
+    {
+      while (cursor.nOffset < run_len)
+      {
+        c = str[cursor.nOffset];
+        if (isspaceW( c ))
+          goto done;
+        else if (isurlneutral( c ))
         {
-          nLen--;
-          if (*str == ':' && !foundColon) {
-            foundColon = TRUE;
-          } else if (!isalnumW(*str) && !isurlspecial(*str)) {
-            cursor.nOffset = str - strStart;
-            if (lastAcceptedChar == ':')
-              ME_MoveCursorChars(editor, &cursor, -1);
-            *candidate_max = cursor;
-            return TRUE;
-          }
-          lastAcceptedChar = *str++;
+          if (!neutral_end.pPara)
+            neutral_end = cursor;
         }
-      }
-    } else {
-      /* End of paragraph: skip it if before candidate span, or terminates
-         current active span */
-      if (candidateStarted) {
-        if (lastAcceptedChar == ':')
-          ME_MoveCursorChars(editor, &cursor, -1);
-        *candidate_max = cursor;
-        return TRUE;
-      }
-    }
+        else
+          neutral_end.pPara = NULL;
-    /* Reaching this point means no span was found, so get next span */
-    if (!ME_NextRun(&cursor.pPara, &cursor.pRun)) {
-      if (candidateStarted) {
-        /* There are no further runs, so take end of text as end of candidate */
-        cursor.nOffset = str - strStart;
-        if (lastAcceptedChar == ':')
-          ME_MoveCursorChars(editor, &cursor, -1);
-        *candidate_max = cursor;
-        return TRUE;
+        cursor.nOffset++;
       }
-      *candidate_max = *candidate_min = cursor;
-      return FALSE;
     }
+
     cursor.nOffset = 0;
+    if (!ME_NextRun(&cursor.pPara, &cursor.pRun))
+      goto done;
   }
-  if (candidateStarted) {
-    /* There are no further runs, so take end of text as end of candidate */
-    if (lastAcceptedChar == ':')
-      ME_MoveCursorChars(editor, &cursor, -1);
-    *candidate_max = cursor;
+done:
+  if (candidateStarted)
+  {
+    if (neutral_end.pPara)
+      *candidate_max = neutral_end;
+    else
+      *candidate_max = cursor;
     return TRUE;
   }
   *candidate_max = *candidate_min = cursor;
diff --git a/dlls/riched20/tests/editor.c b/dlls/riched20/tests/editor.c
index fd00487..2fd9c99 100644
--- a/dlls/riched20/tests/editor.c
+++ b/dlls/riched20/tests/editor.c
@@ -1781,6 +1781,7 @@ static void test_EM_AUTOURLDETECT(void)
     "This is some text with #X on it",
     "This is some text with @X on it",
     "This is some text with \X on it",
+    "This is some text with _X on it",
   };
   /* All of these cause the URL detection to be extended by one more byte,
      thus demonstrating that the tested character is considered as part
@@ -1794,6 +1795,33 @@ static void test_EM_AUTOURLDETECT(void)
     "This is some text with X# on it",
     "This is some text with X@ on it",
     "This is some text with X\ on it",
+    "This is some text with X_ on it",
+  };
+  /* These delims act as neutral breaks.  Whether the url is ended
+     or not depends on the next non-neutral character.  We'll test
+     with Y unchanged, in which case the url should include the
+     deliminator and the Y.  We'll also test with the Y changed
+     to a space, in which case the url stops before the
+     deliminator. */
+  const char * templates_neutral_delim[] = {
+    "This is some text with X-Y on it",
+    "This is some text with X--Y on it",
+    "This is some text with X!Y on it",
+    "This is some text with X[Y on it",
+    "This is some text with X]Y on it",
+    "This is some text with X{Y on it",
+    "This is some text with X}Y on it",
+    "This is some text with X(Y on it",
+    "This is some text with X)Y on it",
+    "This is some text with X"Y on it",
+    "This is some text with X;Y on it",
+    "This is some text with X:Y on it",
+    "This is some text with X'Y on it",
+    "This is some text with X?Y on it",
+    "This is some text with X<Y on it",
+    "This is some text with X>Y on it",
+    "This is some text with X.Y on it",
+    "This is some text with X,Y on it",
   };
   char buffer[1024];
@@ -1974,6 +2002,55 @@ static void test_EM_AUTOURLDETECT(void)
       }
     }
+    for (j = 0; j < sizeof(templates_neutral_delim) / sizeof(const char *); j++) {
+      char * at_pos, * end_pos;
+      int at_offset;
+      int end_offset;
+
+      if (!urls[i].is_url) continue;
+
+      at_pos = strchr(templates_neutral_delim[j], 'X');
+      at_offset = at_pos - templates_neutral_delim[j];
+      memcpy(buffer, templates_neutral_delim[j], at_offset);
+      buffer[at_offset] = '\0';
+      strcat(buffer, urls[i].text);
+      strcat(buffer, templates_neutral_delim[j] + at_offset + 1);
+
+      end_pos = strchr(buffer, 'Y');
+      end_offset = end_pos - buffer;
+
+      SendMessageA(hwndRichEdit, EM_AUTOURLDETECT, TRUE, 0);
+      SendMessageA(hwndRichEdit, WM_SETTEXT, 0, (LPARAM)buffer);
+
+      /* This assumes no templates start with the URL itself, and that they
+         have at least two characters before the URL text */
+      ok(!check_CFE_LINK_selection(hwndRichEdit, 0, 1),
+         "CFE_LINK incorrectly set in (%d-%d), text: %s\n", 0, 1, buffer);
+      ok(!check_CFE_LINK_selection(hwndRichEdit, at_offset -2, at_offset -1),
+         "CFE_LINK incorrectly set in (%d-%d), text: %s\n", at_offset -2, at_offset -1, buffer);
+      ok(!check_CFE_LINK_selection(hwndRichEdit, at_offset -1, at_offset),
+         "CFE_LINK incorrectly set in (%d-%d), text: %s\n", at_offset -1, at_offset, buffer);
+
+      ok(check_CFE_LINK_selection(hwndRichEdit, at_offset, at_offset +1),
+         "CFE_LINK not set in (%d-%d), text: %s\n", at_offset, at_offset +1, buffer);
+      ok(check_CFE_LINK_selection(hwndRichEdit, end_offset -1, end_offset),
+         "CFE_LINK not set in (%d-%d), text: %s\n", end_offset -1, end_offset, buffer);
+      ok(check_CFE_LINK_selection(hwndRichEdit, end_offset, end_offset +1),
+         "CFE_LINK not set in (%d-%d), text: %s\n", end_offset, end_offset +1, buffer);
+
+      *end_pos = ' ';
+
+      SendMessageA(hwndRichEdit, EM_AUTOURLDETECT, TRUE, 0);
+      SendMessageA(hwndRichEdit, WM_SETTEXT, 0, (LPARAM)buffer);
+
+      ok(check_CFE_LINK_selection(hwndRichEdit, at_offset, at_offset +1),
+         "CFE_LINK not set in (%d-%d), text: %s\n", at_offset, at_offset +1, buffer);
+      ok(!check_CFE_LINK_selection(hwndRichEdit, end_offset -1, end_offset),
+         "CFE_LINK set in (%d-%d), text: %s\n", end_offset -1, end_offset, buffer);
+      ok(!check_CFE_LINK_selection(hwndRichEdit, end_offset, end_offset +1),
+         "CFE_LINK set in (%d-%d), text: %s\n", end_offset, end_offset +1, buffer);
+    }
+
     DestroyWindow(hwndRichEdit);
     hwndRichEdit = NULL;
   }

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

2002

2001

Huw Davies : riched20: Fix URL auto-detection to handle 'neutral' characters.