From: Simon McVittie smcv@collabora.com
As per https://github.com/systemd/systemd/blob/v247-rc1/NEWS#L5 there are more kernel uevent types than just "add" and "remove", and we should be treating everything other than "remove" as being potentially an "add".
To cope with this, try_add_device() now checks whether the same device was already added. If so, we ignore it.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com ---
Two patches from Proton, which should help the udev backend when running Wine within a container. There was some container detection too but that seemed a bit specific, and the new option should be enough.
dlls/winebus.sys/bus_udev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/dlls/winebus.sys/bus_udev.c b/dlls/winebus.sys/bus_udev.c index 1cad63f0275..59bb2564def 100644 --- a/dlls/winebus.sys/bus_udev.c +++ b/dlls/winebus.sys/bus_udev.c @@ -1411,16 +1411,14 @@ static void process_monitor_event(struct udev_monitor *monitor)
if (!action) WARN("No action received\n"); - else if (strcmp(action, "add") == 0) + else if (strcmp(action, "remove")) udev_add_device(dev); - else if (strcmp(action, "remove") == 0) + else { impl = find_device_from_udev(dev); if (impl) bus_event_queue_device_removed(&event_queue, &impl->unix_device); else WARN("failed to find device for udev device %p\n", dev); } - else - WARN("Unhandled action %s\n", debugstr_a(action));
udev_device_unref(dev); }
From: Simon McVittie smcv@collabora.com
In a container with a non-trivial user namespace, we cannot rely on libudev communicating with udevd as a way to monitor device nodes, for the following reasons:
* If uid 0 from the host is not mapped to uid 0 in the container, libudev cannot authenticate netlink messages from the host, because their sender uid appears to be the overflowuid. Resolving this by mapping uid 0 into the container is not allowed when creating user namespaces as an unprivileged user, and even when running as a privileged user, it might be desirable for the real uid 0 to not be mapped as a way to harden the security boundary between container and host.
* Depending on the container configuration, initial enumeration might not be able to read /run/udev from the host system. If it can't, sysfs attributes will still work because those are read directly from the kernel via sysfs, but udev properties coming from user-space rules (in particular ID_INPUT_JOYSTICK and friends) will appear to be missing.
* The protocols between udevd and libudev (netlink messages for monitoring, and /run/udev for initial enumeration) are considered to be private to a particular version of udev, and are not a stable API; but in a container, we cannot expect that our copy of libudev is at exactly the same version as udevd on the host system.
Sidestep this by adding a code path that continues to use libudev for the parts that work regardless of whether udevd is running or can be communicated with.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/winebus.sys/bus_udev.c | 271 ++++++++++++++++++++++++++++++++++-- dlls/winebus.sys/main.c | 2 + dlls/winebus.sys/unixlib.h | 1 + 3 files changed, 263 insertions(+), 11 deletions(-)
diff --git a/dlls/winebus.sys/bus_udev.c b/dlls/winebus.sys/bus_udev.c index 59bb2564def..5d479a72fd2 100644 --- a/dlls/winebus.sys/bus_udev.c +++ b/dlls/winebus.sys/bus_udev.c @@ -29,6 +29,8 @@ #include <stdlib.h> #include <stdio.h> #include <stdint.h> +#include <sys/types.h> +#include <dirent.h> #ifdef HAVE_UNISTD_H # include <unistd.h> #endif @@ -47,6 +49,9 @@ #ifdef HAVE_SYS_IOCTL_H # include <sys/ioctl.h> #endif +#ifdef HAVE_SYS_INOTIFY_H +# include <sys/inotify.h> +#endif
#ifdef HAVE_LINUX_INPUT_H # include <linux/input.h> @@ -112,6 +117,7 @@ struct base_device void (*read_report)(struct unix_device *iface);
struct udev_device *udev_device; + char devnode[MAX_PATH]; int device_fd; };
@@ -1195,7 +1201,7 @@ static void hidraw_set_quirks(struct hidraw_device *impl, DWORD bus_type, WORD v impl->quirks |= QUIRK_DS4_BT; }
-static void udev_add_device(struct udev_device *dev) +static void udev_add_device(struct udev_device *dev, int fd) { struct device_desc desc = { @@ -1204,12 +1210,15 @@ static void udev_add_device(struct udev_device *dev) struct base_device *impl; const char *subsystem; const char *devnode; - int fd, bus = 0; + int bus = 0;
if (!(devnode = udev_device_get_devnode(dev))) + { + if (fd >= 0) close(fd); return; + }
- if ((fd = open(devnode, O_RDWR)) == -1) + if (fd < 0 && (fd = open(devnode, O_RDWR)) == -1) { WARN("Unable to open udev device %s: %s\n", debugstr_a(devnode), strerror(errno)); return; @@ -1284,6 +1293,7 @@ static void udev_add_device(struct udev_device *dev) list_add_tail(&device_list, &impl->unix_device.entry); impl->read_report = hidraw_device_read_report; impl->udev_device = udev_device_ref(dev); + strcpy(impl->devnode, devnode); impl->device_fd = fd; hidraw_set_quirks((struct hidraw_device *)impl, bus, desc.vid, desc.pid);
@@ -1296,6 +1306,7 @@ static void udev_add_device(struct udev_device *dev) list_add_tail(&device_list, &impl->unix_device.entry); impl->read_report = lnxev_device_read_report; impl->udev_device = udev_device_ref(dev); + strcpy(impl->devnode, devnode); impl->device_fd = fd;
bus_event_queue_device_created(&event_queue, &impl->unix_device, &desc); @@ -1303,7 +1314,228 @@ static void udev_add_device(struct udev_device *dev) #endif }
-static void build_initial_deviceset(void) +/* inotify watch descriptors for create_monitor_direct() */ +#ifdef HAVE_SYS_INOTIFY_H +static int dev_watch = -1; +static int devinput_watch = -1; + +static void maybe_add_devnode(const char *base, const char *dir, const char *subsystem) +{ + char *syspath = NULL, devnode[MAX_PATH], syslink[MAX_PATH]; + struct udev_device *dev = NULL; + const char *udev_devnode; + int fd = -1; + + TRACE("Considering %s/%s...\n", dir, base); + + snprintf(devnode, sizeof(devnode), "%s/%s", dir, base); + if ((fd = open(devnode, O_RDWR)) < 0) + { + /* When using inotify monitoring, quietly ignore device nodes that we cannot read, + * without emitting a warning. + * + * We can expect that a significant number of device nodes will be permanently + * unreadable, such as the device nodes for keyboards and mice. We can also expect + * that joysticks and game controllers will be temporarily unreadable until udevd + * chmods them; we'll get another chance to open them when their attributes change. */ + TRACE("Unable to open %s, ignoring: %s\n", debugstr_a(devnode), strerror(errno)); + return; + } + + snprintf(syslink, sizeof(syslink), "/sys/class/%s/%s", subsystem, base); + TRACE("Resolving real path to %s\n", debugstr_a(syslink)); + + if (!(syspath = realpath(syslink, NULL))) + { + WARN("Unable to resolve path "%s" for "%s/%s": %s\n", + debugstr_a(syslink), dir, base, strerror(errno)); + goto error; + } + + TRACE("Creating udev_device for %s\n", syspath); + if (!(dev = udev_device_new_from_syspath(udev_context, syspath))) + { + WARN("failed to create udev device from syspath %s\n", syspath); + goto error; + } + + if (!(udev_devnode = udev_device_get_devnode(dev)) || strcmp(devnode, udev_devnode) != 0) + { + WARN("Tried to get udev device for "%s" but device node of "%s" -> "%s" is "%s"\n", + debugstr_a(devnode), debugstr_a(syslink), debugstr_a(syspath), debugstr_a(udev_devnode)); + goto error; + } + + TRACE("Adding device for %s\n", syspath); + udev_add_device(dev, fd); + udev_device_unref(dev); + return; + +error: + if (dev) udev_device_unref(dev); + free(syspath); + close(fd); +} + +static void build_initial_deviceset_direct(void) +{ + struct dirent *dent; + int n, len; + DIR *dir; + + if (!options.disable_hidraw) + { + TRACE("Initial enumeration of /dev/hidraw*\n"); + if (!(dir = opendir("/dev"))) WARN("Unable to open /dev: %s\n", strerror(errno)); + else + { + for (dent = readdir(dir); dent; dent = readdir(dir)) + { + if (sscanf(dent->d_name, "hidraw%u%n", &n, &len) != 1 || len != strlen(dent->d_name)) + WARN("ignoring %s, name doesn't match hidraw%%u\n", debugstr_a(dent->d_name)); + else + maybe_add_devnode(dent->d_name, "/dev", "hidraw"); + } + closedir(dir); + } + } +#ifdef HAS_PROPER_INPUT_HEADER + if (!options.disable_input) + { + TRACE("Initial enumeration of /dev/input/event*\n"); + if (!(dir = opendir("/dev/input"))) WARN("Unable to open /dev/input: %s\n", strerror(errno)); + else + { + for (dent = readdir(dir); dent; dent = readdir(dir)) + { + if (sscanf(dent->d_name, "event%u%n", &n, &len) != 1 || len != strlen(dent->d_name)) + WARN("ignoring %s, name doesn't match event%%u\n", debugstr_a(dent->d_name)); + else + maybe_add_devnode(dent->d_name, "/dev/input", "input"); + } + closedir(dir); + } + } +#endif +} + +static int create_inotify(void) +{ + int systems = 0, fd, flags = IN_CREATE | IN_DELETE | IN_MOVE | IN_ATTRIB; + + if ((fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC)) < 0) + { + WARN("Unable to get inotify fd\n"); + return fd; + } + + if (!options.disable_hidraw) + { + /* We need to watch for attribute changes in addition to + * creation, because when a device is first created, it has + * permissions that we can't read. When udev chmods it to + * something that we maybe *can* read, we'll get an + * IN_ATTRIB event to tell us. */ + dev_watch = inotify_add_watch(fd, "/dev", flags); + if (dev_watch < 0) WARN("Unable to initialize inotify for /dev: %s\n", strerror(errno)); + else systems++; + } +#ifdef HAS_PROPER_INPUT_HEADER + if (!options.disable_input) + { + devinput_watch = inotify_add_watch(fd, "/dev/input", flags); + if (devinput_watch < 0) WARN("Unable to initialize inotify for /dev/input: %s\n", strerror(errno)); + else systems++; + } +#endif + if (systems == 0) + { + WARN("No subsystems added to monitor\n"); + close(fd); + return -1; + } + + return fd; +} + +static struct base_device *find_device_from_devnode(const char *path) +{ + struct base_device *impl; + + LIST_FOR_EACH_ENTRY(impl, &device_list, struct base_device, unix_device.entry) + if (!strcmp(impl->devnode, path)) return impl; + + return NULL; +} + +static void maybe_remove_devnode(const char *base, const char *dir) +{ + struct base_device *impl; + char devnode[MAX_PATH]; + + snprintf(devnode, sizeof(devnode), "%s/%s", dir, base); + impl = find_device_from_devnode(devnode); + if (impl) bus_event_queue_device_removed(&event_queue, &impl->unix_device); + else WARN("failed to find device for path %s\n", devnode); +} + +static void process_inotify_event(int fd) +{ + union + { + struct inotify_event event; + char storage[4096]; + char enough_for_inotify[sizeof(struct inotify_event) + NAME_MAX + 1]; + } buf; + ssize_t bytes; + int n, len; + + if ((bytes = read(fd, &buf, sizeof(buf))) < 0) + WARN("read failed: %u %s\n", errno, strerror(errno)); + else while (bytes > 0) + { + if (buf.event.len > 0) + { + if (buf.event.wd == dev_watch) + { + if (sscanf(buf.event.name, "hidraw%u%n", &n, &len) != 1 || len != strlen(buf.event.name)) + WARN("ignoring %s, name doesn't match hidraw%%u\n", debugstr_a(buf.event.name)); + else if (buf.event.mask & (IN_DELETE | IN_MOVED_FROM)) + maybe_remove_devnode(buf.event.name, "/dev"); + else if (buf.event.mask & (IN_CREATE | IN_MOVED_TO)) + maybe_add_devnode(buf.event.name, "/dev", "hidraw"); + else if (buf.event.mask & IN_ATTRIB) + { + maybe_remove_devnode(buf.event.name, "/dev"); + maybe_add_devnode(buf.event.name, "/dev", "hidraw"); + } + } +#ifdef HAS_PROPER_INPUT_HEADER + else if (buf.event.wd == devinput_watch) + { + if (sscanf(buf.event.name, "event%u%n", &n, &len) != 1 || len != strlen(buf.event.name)) + WARN("ignoring %s, name doesn't match event%%u\n", debugstr_a(buf.event.name)); + else if (buf.event.mask & (IN_DELETE | IN_MOVED_FROM)) + maybe_remove_devnode(buf.event.name, "/dev/input"); + else if (buf.event.mask & (IN_CREATE | IN_MOVED_TO)) + maybe_add_devnode(buf.event.name, "/dev/input", "input"); + else if (buf.event.mask & IN_ATTRIB) + { + maybe_remove_devnode(buf.event.name, "/dev/input"); + maybe_add_devnode(buf.event.name, "/dev/input", "input"); + } + } +#endif + } + + len = sizeof(struct inotify_event) + buf.event.len; + bytes -= len; + if (bytes > 0) memmove(&buf.storage[0], &buf.storage[len], bytes); + } +} +#endif /* HAVE_SYS_INOTIFY_H */ + +static void build_initial_deviceset_udevd(void) { struct udev_enumerate *enumerate; struct udev_list_entry *devices, *dev_list_entry; @@ -1338,7 +1570,7 @@ static void build_initial_deviceset(void) path = udev_list_entry_get_name(dev_list_entry); if ((dev = udev_device_new_from_syspath(udev_context, path))) { - udev_add_device(dev); + udev_add_device(dev, -1); udev_device_unref(dev); } } @@ -1412,7 +1644,7 @@ static void process_monitor_event(struct udev_monitor *monitor) if (!action) WARN("No action received\n"); else if (strcmp(action, "remove")) - udev_add_device(dev); + udev_add_device(dev, -1); else { impl = find_device_from_udev(dev); @@ -1425,7 +1657,7 @@ static void process_monitor_event(struct udev_monitor *monitor)
NTSTATUS udev_bus_init(void *args) { - int monitor_fd; + int monitor_fd = -1;
TRACE("args %p\n", args);
@@ -1443,12 +1675,22 @@ NTSTATUS udev_bus_init(void *args) goto error; }
- if (!(udev_monitor = create_monitor(&monitor_fd))) +#if HAVE_SYS_INOTIFY_H + if (options.disable_udevd) monitor_fd = create_inotify(); + if (monitor_fd < 0) options.disable_udevd = FALSE; +#else + if (options.disable_udevd) ERR("inotify support not compiled in!\n"); + options.disable_udevd = FALSE; +#endif + + if (monitor_fd < 0 && !(udev_monitor = create_monitor(&monitor_fd))) { ERR("UDEV monitor creation failed\n"); goto error; }
+ if (monitor_fd < 0) goto error; + poll_fds[0].fd = monitor_fd; poll_fds[0].events = POLLIN; poll_fds[0].revents = 0; @@ -1457,10 +1699,13 @@ NTSTATUS udev_bus_init(void *args) poll_fds[1].revents = 0; poll_count = 2;
- build_initial_deviceset(); + if (options.disable_udevd) build_initial_deviceset_direct(); + else build_initial_deviceset_udevd(); + return STATUS_SUCCESS;
error: + if (udev_monitor) udev_monitor_unref(udev_monitor); if (udev_context) udev_unref(udev_context); udev_context = NULL; close(deviceloop_control[0]); @@ -1493,7 +1738,11 @@ NTSTATUS udev_bus_wait(void *args) while (poll(pfd, count, -1) <= 0) {}
pthread_mutex_lock(&udev_cs); - if (pfd[0].revents) process_monitor_event(udev_monitor); + if (pfd[0].revents) + { + if (udev_monitor) process_monitor_event(udev_monitor); + else process_inotify_event(pfd[0].fd); + } if (pfd[1].revents) read(deviceloop_control[0], &ctrl, 1); for (i = 2; i < count; ++i) { @@ -1506,7 +1755,7 @@ NTSTATUS udev_bus_wait(void *args)
TRACE("UDEV main loop exiting\n"); bus_event_queue_destroy(&event_queue); - udev_monitor_unref(udev_monitor); + if (udev_monitor) udev_monitor_unref(udev_monitor); udev_unref(udev_context); udev_context = NULL; close(deviceloop_control[0]); diff --git a/dlls/winebus.sys/main.c b/dlls/winebus.sys/main.c index a98537991f5..73febf5cd06 100644 --- a/dlls/winebus.sys/main.c +++ b/dlls/winebus.sys/main.c @@ -736,6 +736,8 @@ static NTSTATUS udev_driver_init(void) if (bus_options.disable_hidraw) TRACE("UDEV hidraw devices disabled in registry\n"); bus_options.disable_input = check_bus_option(L"DisableInput", 0); if (bus_options.disable_input) TRACE("UDEV input devices disabled in registry\n"); + bus_options.disable_udevd = check_bus_option(L"DisableUdevd", 0); + if (bus_options.disable_udevd) TRACE("UDEV udevd use disabled in registry\n");
return bus_main_thread_start(&bus); } diff --git a/dlls/winebus.sys/unixlib.h b/dlls/winebus.sys/unixlib.h index e3996838c61..11a4efac36e 100644 --- a/dlls/winebus.sys/unixlib.h +++ b/dlls/winebus.sys/unixlib.h @@ -56,6 +56,7 @@ struct udev_bus_options { BOOL disable_hidraw; BOOL disable_input; + BOOL disable_udevd; };
struct iohid_bus_options
On Wed, 20 Oct 2021 at 11:01:32 +0200, Rémi Bernon wrote:
There was some container detection too but that seemed a bit specific, and the new option should be enough.
(I am not a Wine or Proton developer and not subscribed to the list, so please cc me if appropriate.)
I think some version of the container detection might be valuable to have as a follow-up to this: because udev is not designed to work across container boundaries, it seems like it would be good to have Wine automatically "do the right thing" when running inside an unprivileged container like Flatpak or Steam's pressure-vessel.
The version of the container detection that I contributed to Proton about a year ago was specific to those two container managers:
- bypass_udevd = check_bus_option(&disable_udevd, 0); + if (access ("/run/pressure-vessel", R_OK) + || access ("/.flatpak-info", R_OK)) + { + TRACE("Container detected, bypassing udevd by default\n"); + bypass_udevd = 1; + } + + bypass_udevd = check_bus_option(&disable_udevd, bypass_udevd);
but unprivileged container managers have moved on a bit, and there is now a specification (originating in systemd[1]) for how a container manager can announce to contained code that it is running in a container: if the file /run/host/container-manager exists, then we're in a container (and the file should contain a string like "flatpak\n" or "pressure-vessel\n"). So replacing the access() checks above with access("/run/host/container-manager", R_OK) might be better.
For Flatpak specifically, /run/host/container-manager was added in 1.9.1, so if you want to support older Flatpak versions like the 1.8.x found in RHEL 8, the right thing would now be something like this (untested), which is the equivalent of how this is now done[3] in SDL:
- bypass_udevd = check_bus_option(&disable_udevd, 0); + if (access ("/run/host/container-manager", R_OK) + || access ("/.flatpak-info", R_OK)) + { + TRACE("Container detected, bypassing udevd by default\n"); + bypass_udevd = 1; + } + + bypass_udevd = check_bus_option(&disable_udevd, bypass_udevd);
smcv
[1] https://systemd.io/CONTAINER_INTERFACE/ [2] https://github.com/containers/podman/issues/3586 [3] https://github.com/libsdl-org/SDL/blob/release-2.0.16/src/joystick/linux/SDL...
On 10/20/21 2:51 PM, Simon McVittie wrote:
On Wed, 20 Oct 2021 at 11:01:32 +0200, Rémi Bernon wrote:
There was some container detection too but that seemed a bit specific, and the new option should be enough.
(I am not a Wine or Proton developer and not subscribed to the list, so please cc me if appropriate.)
I think some version of the container detection might be valuable to have as a follow-up to this: because udev is not designed to work across container boundaries, it seems like it would be good to have Wine automatically "do the right thing" when running inside an unprivileged container like Flatpak or Steam's pressure-vessel.
The version of the container detection that I contributed to Proton about a year ago was specific to those two container managers:
- bypass_udevd = check_bus_option(&disable_udevd, 0);
- if (access ("/run/pressure-vessel", R_OK)
|| access ("/.flatpak-info", R_OK))
- {
TRACE("Container detected, bypassing udevd by default\n");
bypass_udevd = 1;
- }
- bypass_udevd = check_bus_option(&disable_udevd, bypass_udevd);
but unprivileged container managers have moved on a bit, and there is now a specification (originating in systemd[1]) for how a container manager can announce to contained code that it is running in a container: if the file /run/host/container-manager exists, then we're in a container (and the file should contain a string like "flatpak\n" or "pressure-vessel\n"). So replacing the access() checks above with access("/run/host/container-manager", R_OK) might be better.
Thanks, good to know! If it's now part of some standard I'll probably consider adding it.