Module: wine Branch: master Commit: d88aa37e59160ce5c6ca2be9dce30f7412c030b9 URL: http://source.winehq.org/git/wine.git/?a=commit;h=d88aa37e59160ce5c6ca2be9dc...
Author: Piotr Caban piotr@codeweavers.com Date: Thu Jan 10 11:45:39 2013 +0100
msvcrt: Added UTF8 support to read function.
---
dlls/msvcrt/file.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 182 insertions(+), 0 deletions(-)
diff --git a/dlls/msvcrt/file.c b/dlls/msvcrt/file.c index e3a477f..c53e061 100644 --- a/dlls/msvcrt/file.c +++ b/dlls/msvcrt/file.c @@ -2103,6 +2103,185 @@ int CDECL MSVCRT__rmtmp(void) return num_removed; }
+static inline int get_utf8_char_len(char ch) +{ + if((ch&0xf8) == 0xf0) + return 4; + else if((ch&0xf0) == 0xe0) + return 3; + else if((ch&0xe0) == 0xc0) + return 2; + return 1; +} + +/********************************************************************* + * (internal) read_utf8 + */ +static int read_utf8(int fd, MSVCRT_wchar_t *buf, unsigned int count) +{ + ioinfo *fdinfo = msvcrt_get_ioinfo(fd); + HANDLE hand = fdinfo->handle; + char min_buf[4], *readbuf, lookahead; + DWORD readbuf_size, pos=0, num_read=1, char_len, i, j; + + /* make the buffer big enough to hold at least one character */ + /* read bytes have to fit to output and lookahead buffers */ + count /= 2; + readbuf_size = count < 4 ? 4 : count; + if(readbuf_size<=4 || !(readbuf = MSVCRT_malloc(readbuf_size))) { + readbuf_size = 4; + readbuf = min_buf; + } + + if(fdinfo->lookahead[0] != '\n') { + readbuf[pos++] = fdinfo->lookahead[0]; + fdinfo->lookahead[0] = '\n'; + + if(fdinfo->lookahead[1] != '\n') { + readbuf[pos++] = fdinfo->lookahead[1]; + fdinfo->lookahead[1] = '\n'; + + if(fdinfo->lookahead[2] != '\n') { + readbuf[pos++] = fdinfo->lookahead[2]; + fdinfo->lookahead[2] = '\n'; + } + } + } + + /* NOTE: this case is broken in native dll, reading + * sometimes fails when small buffer is passed + */ + if(count < 4) { + if(!pos && !ReadFile(hand, readbuf, 1, &num_read, NULL)) { + if (GetLastError() == ERROR_BROKEN_PIPE) { + fdinfo->wxflag |= WX_ATEOF; + return 0; + }else { + msvcrt_set_errno(GetLastError()); + return -1; + } + }else if(!num_read) { + fdinfo->wxflag |= WX_ATEOF; + return 0; + }else { + pos++; + } + + char_len = get_utf8_char_len(readbuf[0]); + if(char_len>pos) { + if(ReadFile(hand, readbuf+pos, char_len-pos, &num_read, NULL)) + pos += num_read; + } + + if(readbuf[0] == '\n') + fdinfo->wxflag |= WX_READNL; + else + fdinfo->wxflag &= ~WX_READNL; + + if(readbuf[0] == 0x1a) { + fdinfo->wxflag |= WX_ATEOF; + return 0; + } + + if(readbuf[0] == '\r') { + if(!ReadFile(hand, &lookahead, 1, &num_read, NULL) || num_read!=1) + buf[0] = '\r'; + else if(lookahead == '\n') + buf[0] = '\n'; + else { + buf[0] = '\r'; + if(fdinfo->wxflag & WX_PIPE) + fdinfo->lookahead[0] = lookahead; + else + SetFilePointer(fdinfo->handle, -1, NULL, FILE_CURRENT); + } + return 2; + } + + if(!(num_read = MultiByteToWideChar(CP_UTF8, 0, readbuf, pos, buf, count))) { + msvcrt_set_errno(GetLastError()); + return -1; + } + + return num_read*2; + } + + if(!ReadFile(hand, readbuf+pos, readbuf_size-pos, &num_read, NULL)) { + if(pos) { + num_read = 0; + }else if(GetLastError() == ERROR_BROKEN_PIPE) { + fdinfo->wxflag |= WX_ATEOF; + return 0; + }else { + msvcrt_set_errno(GetLastError()); + return -1; + } + }else if(!pos && !num_read) { + fdinfo->wxflag |= WX_ATEOF; + return 0; + } + + pos += num_read; + if(readbuf[0] == '\n') + fdinfo->wxflag |= WX_READNL; + else + fdinfo->wxflag &= ~WX_READNL; + + /* Find first byte of last character (may be incomplete) */ + for(i=pos-1; i>0 && i>pos-4; i--) + if((readbuf[i]&0xc0) != 0x80) + break; + char_len = get_utf8_char_len(readbuf[i]); + if(char_len+i <= pos) + i += char_len; + + if(fdinfo->wxflag & WX_PIPE) { + if(i < pos) + fdinfo->lookahead[0] = readbuf[i]; + if(i+1 < pos) + fdinfo->lookahead[1] = readbuf[i+1]; + if(i+2 < pos) + fdinfo->lookahead[2] = readbuf[i+2]; + }else if(i < pos) { + SetFilePointer(fdinfo->handle, i-pos, NULL, FILE_CURRENT); + } + pos = i; + + for(i=0, j=0; i<pos; i++) { + if(readbuf[i] == 0x1a) { + fdinfo->wxflag |= WX_ATEOF; + break; + } + + /* strip '\r' if followed by '\n' */ + if(readbuf[i] == '\r' && i+1==pos) { + if(fdinfo->lookahead[0] != '\n' || !ReadFile(hand, &lookahead, 1, &num_read, NULL) || !num_read) { + readbuf[j++] = '\r'; + }else if(lookahead == '\n' && j==0) { + readbuf[j++] = '\n'; + }else { + if(lookahead != '\n') + readbuf[j++] = '\r'; + + if(fdinfo->wxflag & WX_PIPE) + fdinfo->lookahead[0] = lookahead; + else + SetFilePointer(fdinfo->handle, -1, NULL, FILE_CURRENT); + } + }else if(readbuf[i]!='\r' || readbuf[i+1]!='\n') { + readbuf[j++] = readbuf[i]; + } + } + pos = j; + + if(!(num_read = MultiByteToWideChar(CP_UTF8, 0, readbuf, pos, buf, count))) { + msvcrt_set_errno(GetLastError()); + return -1; + } + + return num_read*2; +} + /********************************************************************* * (internal) read_i * @@ -2140,6 +2319,9 @@ static int read_i(int fd, void *buf, unsigned int count) return -1; }
+ if((fdinfo->wxflag&WX_TEXT) && (fdinfo->exflag&EF_UTF8)) + return read_utf8(fd, buf, count); + if (fdinfo->lookahead[0]!='\n' || ReadFile(hand, bufstart, count, &num_read, NULL)) { if (fdinfo->lookahead[0] != '\n')