Shachar Shemesh wrote:
- used direct syscalls as my libc doesn't have epoll_* functions
Which Alexandre already asked not to. In any case, if you do, you have to add a copyright notice to the authors of the header file who's content you are copying.
OK, fixed.
- fixed array of epoll_event structures for starters
I missed that. What did you mean by this?
I mean I've allocated an array of 10 struct epoll_events to receive events from epoll, and I plan to dynamically allocate it later.
- do_epoll_remove assert commented as it occasionally triggers
Was that assert in my code? By looking at it, I see no reason for it NOT to be triggered occasionally. Don't forget that the file descriptor may have been closed prior to being removed from the list, in which case the system removed it from the epoll fd itself.
I've tried to make it so that I only add FDs that are known not to be epoll'ed already, and remove FDs that are. To verify that is correct, I am using asserts.
If the file descriptor is already in there, EPOLL_CTL_ADD will fail. You will then have to call EPOLL_CTL_MOD.
The patch from the previous mail always removed the FD before adding it instead of trying to modify the poll flags.
I'm assuming you pulled the timeout code into a separate function here. If there is anything fd related here, please let me know.
Correct.
If you really have to, make that: #if POLLIN!=EPOLLIN || ....
Fixed in a different way.
Read the comment in the appropriate section of my code. You have a race here that will crash you occasionally.
Fixed.
+void main_loop(void) +{ +#ifdef USE_EPOLL
- if( epoll_fd >= 0 )
- {
fprintf(stderr,"epoll enabled\n");
epoll_loop();
close( epoll_fd );
return;
- }
+#endif
- select_loop();
You are calling both epoll AND poll here.
No, there's a return in there, so only one of the main loops is entered.
Like I said earlier, EPOLL_CTL_MOD does the same thing.
The new patch uses EPOLL_CTL_MOD.
Well, the most serious problem with this code is the race you did not handle. I don't like the use of a whole pollfd array, but that's not for me to decide.
OK, I've fixed the race as I mentioned above. I'm trying to use exactly the same data structures as the select() loop where possible so that it's easier to verify the epoll_loop works assuming that the select_loop() works.
Looking forward to comments on the attached patch.
Mike
Note: libepoll is available here: http://www.xmailserver.org/linux-patches/nio-improve.html
Index: server/fd.c =================================================================== RCS file: /home/wine/wine/server/fd.c,v retrieving revision 1.25 diff -u -r1.25 fd.c --- server/fd.c 9 Sep 2004 00:28:36 -0000 1.25 +++ server/fd.c 9 Sep 2004 04:51:16 -0000 @@ -2,6 +2,8 @@ * Server-side file descriptor management * * Copyright (C) 2000, 2003 Alexandre Julliard + * Copyright (C) 2004 Shachar Shemesh for Lingnu Open Source Consulting ltd. + * Copyright (C) 2004 Mike McCormack * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -33,6 +35,10 @@ #ifdef HAVE_SYS_POLL_H #include <sys/poll.h> #endif +#include <stdint.h> +#ifdef HAVE_SYS_EPOLL_H +#include <sys/epoll.h> +#endif #include <sys/stat.h> #include <sys/time.h> #include <sys/types.h> @@ -236,6 +242,106 @@ static int allocated_users; /* count of allocated entries in the array */ static struct fd **freelist; /* list of free entries in the array */
+#ifdef HAVE_SYS_EPOLL_H + +#define EPOLL_SIZE 1024 +#define MAX_EVENTS 10 + +static int epoll_fd; +struct epoll_event *epoll_ev_first, *epoll_ev_last; + +/* this should optimize away if the poll constants are the same as the epoll ones */ +static inline int epoll_events( int events ) +{ + int r = 0; + + if( (POLLIN == EPOLLIN) && (POLLOUT == EPOLLOUT) && + (POLLERR == EPOLLERR) && (POLLHUP == EPOLLHUP) ) + return events; + if( POLLIN&events ) r |= EPOLLIN; + if( POLLOUT&events ) r |= EPOLLOUT; + if( POLLERR&events ) r |= EPOLLERR; + if( POLLHUP&events ) r |= EPOLLHUP; + return r; +} + +static inline void do_epoll_add( int fd, int events, unsigned int user ) +{ + struct epoll_event eev; + int r; + + if (epoll_fd < 0) + return; + if (fd < 0) + return; + if (!events) + return; + eev.events = events; + eev.data.u32 = user; + r = epoll_ctl( epoll_fd, EPOLL_CTL_ADD, fd, &eev ); + assert( 0 == r ); +} + +static inline void do_epoll_mod( int fd, int events, unsigned int user ) +{ + struct epoll_event eev; + int r; + + if (epoll_fd < 0) + return; + if (fd < 0) + return; + eev.events = events; + eev.data.u32 = user; + r = epoll_ctl( epoll_fd, EPOLL_CTL_MOD, fd, &eev ); + assert( 0 == r ); +} + +static inline void do_epoll_remove( int fd, int old_events, unsigned int user ) +{ + struct epoll_event eev, *i; + int r; + + if (epoll_fd < 0) + return; + if (fd < 0) + return; + if (!old_events) + return; + r = epoll_ctl( epoll_fd, EPOLL_CTL_DEL, fd, &eev ); + assert( 0 == r ); + + /* remove any unprocessed events for this fd */ + for( i=epoll_ev_first; i<epoll_ev_last; i++ ) + if (i->data.u32 == user) + i->events = 0; +} + +void init_fd() +{ + epoll_fd = epoll_create(EPOLL_SIZE); +} + +#else + +static inline void do_epoll_add( int fd, int events, unsigned int user ) +{ +} + +static inline void do_epoll_mod( int fd, int events, unsigned int user ) +{ +} + +static inline void do_epoll_remove( int fd, int old_events, unsigned int user ) +{ +} + +void init_fd() +{ +} + +#endif + /* add a user in the poll array and return its index, or -1 on failure */ static int add_poll_user( struct fd *fd ) { @@ -280,6 +386,7 @@ { assert( user >= 0 ); assert( poll_users[user] == fd ); + do_epoll_remove( pollfd[user].fd, pollfd[user].events, user ); pollfd[user].fd = -1; pollfd[user].events = 0; pollfd[user].revents = 0; @@ -288,56 +395,95 @@ active_users--; }
- -/* server main poll() loop */ -void main_loop(void) +static inline int next_delta() { - int ret; + long diff = -1; + struct list expired_list, *ptr; + struct timeval now;
- while (active_users) + if (list_empty(&timeout_list)) + return diff; + + gettimeofday( &now, NULL ); + + /* first remove all expired timers from the list */ + + list_init( &expired_list ); + while ((ptr = list_head( &timeout_list )) != NULL) { - long diff = -1; - if (!list_empty( &timeout_list )) + struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry ); + + if (!time_before( &now, &timeout->when )) { - struct list expired_list, *ptr; - struct timeval now; - gettimeofday( &now, NULL ); + list_remove( &timeout->entry ); + list_add_tail( &expired_list, &timeout->entry ); + } + else break; + }
- /* first remove all expired timers from the list */ + /* now call the callback for all the removed timers */
- list_init( &expired_list ); - while ((ptr = list_head( &timeout_list )) != NULL) - { - struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry ); + while ((ptr = list_head( &expired_list )) != NULL) + { + struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry ); + list_remove( &timeout->entry ); + timeout->callback( timeout->private ); + free( timeout ); + }
- if (!time_before( &now, &timeout->when )) - { - list_remove( &timeout->entry ); - list_add_tail( &expired_list, &timeout->entry ); - } - else break; - } + if (!active_users) return diff; /* last user removed by a timeout */
- /* now call the callback for all the removed timers */ + if ((ptr = list_head( &timeout_list )) != NULL) + { + struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry ); + diff = (timeout->when.tv_sec - now.tv_sec) * 1000 + + (timeout->when.tv_usec - now.tv_usec) / 1000; + if (diff < 0) diff = 0; + }
- while ((ptr = list_head( &expired_list )) != NULL) - { - struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry ); - list_remove( &timeout->entry ); - timeout->callback( timeout->private ); - free( timeout ); - } + return diff; +}
- if (!active_users) break; /* last user removed by a timeout */ +#ifdef HAVE_SYS_EPOLL_H
- if ((ptr = list_head( &timeout_list )) != NULL) +/* server main poll() loop using epoll */ +static void epoll_loop(void) +{ + struct epoll_event ev[MAX_EVENTS]; + + while (active_users) + { + int ret, i, user, diff = next_delta(); + if (!active_users) break; /* last user removed by a timeout */ + + ret = epoll_wait( epoll_fd, &ev[0], MAX_EVENTS, diff ); + for (i = 0; i < ret; i++) + { + user = ev[i].data.u32; + assert( user < nb_users ); + epoll_ev_last = &ev[ret]; + if (ev[i].events) { - struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry ); - diff = (timeout->when.tv_sec - now.tv_sec) * 1000 - + (timeout->when.tv_usec - now.tv_usec) / 1000; - if (diff < 0) diff = 0; + epoll_ev_first = &ev[i+1]; + pollfd[user].revents = ev[i].events; + fd_poll_event( poll_users[user], ev[i].events ); } } + epoll_ev_first = NULL; + epoll_ev_last = NULL; + } +} +#endif + +/* server main poll() loop using select */ +static void select_loop(void) +{ + int ret; + + while (active_users) + { + int diff = next_delta(); + if (!active_users) break; /* last user removed by a timeout */
ret = poll( pollfd, nb_users, diff ); if (ret > 0) @@ -355,6 +501,18 @@ } }
+void main_loop(void) +{ +#ifdef HAVE_SYS_EPOLL_H + if( epoll_fd >= 0 ) + { + epoll_loop(); + close( epoll_fd ); + return; + } +#endif + select_loop(); +}
/****************************************************************/ /* inode functions */ @@ -837,12 +995,24 @@ assert( poll_users[user] == fd ); if (events == -1) /* stop waiting on this fd completely */ { + do_epoll_remove( pollfd[user].fd, pollfd[user].events, user ); pollfd[user].fd = -1; pollfd[user].events = POLLERR; pollfd[user].revents = 0; } else if (pollfd[user].fd != -1 || !pollfd[user].events) { + /* remove the old fd */ + if ( (fd->unix_fd != pollfd[user].fd) || !events ) + do_epoll_remove( pollfd[user].fd, pollfd[user].events, user ); + if (events) + { + /* add the new fd or update the old one */ + if ( (pollfd[user].fd == -1) || !pollfd[user].events ) + do_epoll_add( fd->unix_fd, events, user ); + else if (pollfd[user].events != events) + do_epoll_mod( fd->unix_fd, events, user ); + } pollfd[user].fd = fd->unix_fd; pollfd[user].events = events; } Index: server/main.c =================================================================== RCS file: /home/wine/wine/server/main.c,v retrieving revision 1.31 diff -u -r1.31 main.c --- server/main.c 26 Mar 2003 01:32:18 -0000 1.31 +++ server/main.c 9 Sep 2004 04:51:17 -0000 @@ -110,6 +110,7 @@ exit(1); /* make sure atexit functions get called */ }
+void init_fd(); int main( int argc, char *argv[] ) { parse_args( argc, argv ); @@ -122,6 +123,7 @@ signal( SIGTERM, sigterm_handler ); signal( SIGABRT, sigterm_handler );
+ init_fd(); sock_init(); open_master_socket(); sync_namespace = create_namespace( 37, TRUE ); Index: server/Makefile.in =================================================================== RCS file: /home/wine/wine/server/Makefile.in,v retrieving revision 1.51 diff -u -r1.51 Makefile.in --- server/Makefile.in 23 Jun 2004 20:44:58 -0000 1.51 +++ server/Makefile.in 9 Sep 2004 04:51:17 -0000 @@ -5,6 +5,8 @@ VPATH = @srcdir@ MODULE = none
+LIBEPOLL = @LIBEPOLL@ + C_SRCS = \ async.c \ atom.c \ @@ -52,7 +54,7 @@ @MAKE_RULES@
wineserver: $(OBJS) - $(CC) -o $(PROGRAMS) $(OBJS) $(LIBWINE) $(LIBUNICODE) $(LIBPORT) $(LDFLAGS) $(LIBS) + $(CC) -o $(PROGRAMS) $(OBJS) $(LIBWINE) $(LIBUNICODE) $(LIBPORT) $(LDFLAGS) $(LIBS) $(LIBEPOLL)
install:: $(PROGRAMS) $(MKINSTALLDIRS) $(bindir) Index: configure.ac =================================================================== RCS file: /home/wine/wine/configure.ac,v retrieving revision 1.307 diff -u -r1.307 configure.ac --- configure.ac 7 Sep 2004 22:44:34 -0000 1.307 +++ configure.ac 9 Sep 2004 04:51:17 -0000 @@ -132,6 +132,8 @@ AC_CHECK_LIB(xpg4,_xpg4_setrunelocale) dnl Check for -lpoll for Mac OS X/Darwin AC_CHECK_LIB(poll,poll) +dnl Check for -lepoll +AC_CHECK_LIB(epoll,epoll_create,AC_SUBST(LIBEPOLL,-lepoll)) dnl Check for -lresolv for Mac OS X/Darwin AC_CHECK_LIB(resolv,res_9_init) dnl Check for -lpthread @@ -757,6 +759,7 @@ EXTRACFLAGS="$EXTRACFLAGS -Wpointer-arith" fi fi +EXTRACFLAGS="$EXTRACFLAGS -Wsign-compare"
dnl **** Check how to define a function in assembly code ****
@@ -1046,6 +1049,7 @@ _vsnprintf \ chsize \ clone \ + epoll_create \ finite \ fpclass \ fstatfs \ @@ -1159,6 +1163,7 @@ sys/msg.h \ sys/param.h \ sys/poll.h \ + sys/epoll.h \ sys/ptrace.h \ sys/reg.h \ sys/scsiio.h \ @@ -1629,6 +1634,7 @@ dlls/opengl32/Makefile dlls/psapi/Makefile dlls/psapi/tests/Makefile +dlls/pstorec/Makefile dlls/qcap/Makefile dlls/quartz/Makefile dlls/quartz/tests/Makefile Index: include/config.h.in =================================================================== RCS file: /home/wine/wine/include/config.h.in,v retrieving revision 1.195 diff -u -r1.195 config.h.in --- include/config.h.in 7 Sep 2004 22:44:34 -0000 1.195 +++ include/config.h.in 9 Sep 2004 04:51:17 -0000 @@ -80,6 +80,9 @@ /* Define to 1 if you have the <elf.h> header file. */ #undef HAVE_ELF_H
+/* Define to 1 if you have the `epoll_create' function. */ +#undef HAVE_EPOLL_CREATE + /* Define to 1 if you have the `finite' function. */ #undef HAVE_FINITE
@@ -248,6 +251,9 @@ /* Define if you have the curses library (-lcurses) */ #undef HAVE_LIBCURSES
+/* Define to 1 if you have the `epoll' library (-lepoll). */ +#undef HAVE_LIBEPOLL + /* Define to 1 if you have the `i386' library (-li386). */ #undef HAVE_LIBI386
@@ -613,6 +619,9 @@
/* Define to 1 if you have the <sys/elf32.h> header file. */ #undef HAVE_SYS_ELF32_H + +/* Define to 1 if you have the <sys/epoll.h> header file. */ +#undef HAVE_SYS_EPOLL_H
/* Define to 1 if you have the <sys/errno.h> header file. */ #undef HAVE_SYS_ERRNO_H