\documentclass{beamer} \usepackage[normalem]{ulem} \usepackage{hyperref} \usepackage{qrcode} \usepackage{tabularx} \newcommand{\BSD}{BSD} \newcommand{\EuroBSDcon}{EuroBSDcon} \newcommand{\NetBSD}{NetBSD} \newcommand{\NetBSDcurrent}{\NetBSD-current} \newcommand{\USB}{USB} \title{How I learned to stop worrying and yank the \USB} \author{Taylor R Campbell \\ \texttt{riastradh@NetBSD.org}} \date{\EuroBSDcon\ 2022 \\ Vienna, Austria \\ September 17, 2022} \begin{document} \frame{\titlepage} \begin{frame} \frametitle{How I learned to stop worrying and yank the \USB} \centering \url{https://www.NetBSD.org/gallery/presentations/riastradh/eurobsdcon2022/opendetach.pdf} \vspace{\baselineskip} \qrcode[height=2in]{https://www.NetBSD.org/gallery/presentations/riastradh/eurobsdcon2022/opendetach.pdf} \end{frame} \begin{frame}[fragile] \frametitle{Devices in \BSD: autoconf(9) and /dev nodes} \begin{itemize} \item autoconf(9) instances in kernel: pci0, pchb0, ppb1, wsdisplay0, xhci2, \dots \begin{itemize} \item Bundle of related driver state for a hardware device \item Organized in a tree based on hardware \item Discovered at boot by bus enumeration and on hotplug events \item match, attach, detach \end{itemize} \item /dev nodes (chardevs, blockdevs) for userland interface: /dev/uhid0, /dev/ttyU1, /dev/rsd3a, /dev/zero, \dots \begin{itemize} \item Software interface for userland (char) or file systems (block) \item State may be: \begin{itemize} \item backed by autoconf instance \item allocated in software: `cloning devices' \item stateless: /dev/zero, /dev/null, /dev/mem, \dots \end{itemize} \item Access bracketed by open and close as files \item open, read/write/ioctl/strategy/\dots, close \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{autoconf example: ualea(4)} \tiny \begin{verbatim} static int ualea_match(device_t parent, cfdata_t match, void *aux) { struct usbif_attach_arg *uiaa = aux; if (usb_lookup(ualea_devs, uiaa->uiaa_vendor, uiaa->uiaa_product)) return UMATCH_VENDOR_PRODUCT; return UMATCH_NONE; } static void ualea_attach(device_t parent, device_t self, void *aux) { struct usbif_attach_arg *uiaa = aux; struct ualea_softc *sc = device_private(self); ... } static int ualea_detach(device_t self, int flags) { ... } CFATTACH_DECL_NEW(ualea, sizeof(struct ualea_softc), ualea_match, ualea_attach, ualea_detach, NULL); \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{cdevsw example: ulpt(4)} \tiny \begin{verbatim} static int ulptopen(dev_t dev, int flag, int mode, struct lwp *l) { struct ulpt_softc *sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev)); if (sc == NULL) return ENXIO; ... } static int ulptclose(dev_t dev, int flag, int mode, struct lwp *l) { ... } static int ulptread(dev_t dev, struct uio *uio, int flags) { struct ulpt_softc *sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev)); ... } ... const struct cdevsw ulpt_cdevsw = { .d_open = ulptopen, .d_close = ulptclose, .d_read = ulptread, .d_write = ulptwrite, ... }; \end{verbatim} \end{frame} \begin{frame}[fragile] \begin{verbatim} pci0 i915drmkms0 intelfb0 wsdisplay0 <- /dev/ttyE0 xhci0 usb0 uhub0 umass0 scsibus0 sd0 <- /dev/sd0a, /dev/sd0b, ... umass1 scsibus1 sd1 <- /dev/sd1a, /dev/sd1b, ... usb1 uhub1 uftdi0 ucom0 <- /dev/ttyU0, /dev/dtyU0 ... \end{verbatim} \end{frame} \begin{frame} \begin{tabular}{ll} \textit{device\_t} & \textit{/dev node (amd64)} \\ \\ uhid$N$ & /dev/uhid$N$ (chr maj=66 min=$N$) \\ \\ ucom$N$ & /dev/ttyU$N$ (chr maj=66 min=$N$) \\ & /dev/dtyU$N$ (chr maj=66 min=$\mathtt{0x80000} \mathbin| N$) \\ \\ sd$N$ & /dev/sd$N$a (blk maj=4 min=$64N$) \\ & /dev/sd$N$b (blk maj=4 min=$64N + 1$) \\ & \quad\vdots \\ & /dev/rsd$N$a (chr maj=13 min=$64N$) \\ & /dev/rsd$N$b (chr maj=13 min=$64N + 1$) \\ & \quad\vdots \\ \\ (cloning) & /dev/audio$N$ (chr maj=42 min=$\mathtt{0x80} \mathbin| N$) \\ \\ (stateless) & /dev/null \end{tabular} \end{frame} \begin{frame} \frametitle{Easy timeline} \begin{enumerate} \item \texttt{foo\_attach} when device plugged in \item \texttt{foo\_open} when program opens /dev node \item \texttt{foo\_read}/\texttt{write}/\texttt{ioctl} when program does I/O on file \item \texttt{foo\_close} when program closes file \item \texttt{foo\_detach} when device unplugged after no longer in use \end{enumerate} \end{frame} \begin{frame} \frametitle{Easy timeline} \begin{enumerate} \item attach \item open \item read/write/ioctl \item close \item detach \end{enumerate} \end{frame} \begin{frame} \frametitle{\sout{Easy} Naive timeline} \begin{enumerate} \item attach \item open \item read/write/ioctl \item close \item detach \end{enumerate} \end{frame} \begin{frame} \frametitle{Complication: device yanked while open?} \begin{enumerate} \item attach \item open \item read/write/ioctl \item detach \item more read/write/ioctl \item close \end{enumerate} \end{frame} \begin{frame} \frametitle{Complication: no device to open?} \begin{enumerate} \item open \only<2->{$\Longrightarrow$ must fail} \item attach \item detach \item open \only<2->{$\Longrightarrow$ must fail} \end{enumerate} \end{frame} \begin{frame} \frametitle{Complication: device yanked in the middle of open?} \begin{enumerate} \item attach \item open called \item detach \item open returns \begin{itemize} \item<2-> success? \item<3-> failure? \item<4-> crash? \end{itemize} \end{enumerate} \end{frame} \begin{frame} \frametitle{Complication: concurrent open?} \begin{enumerate} \item attach \item \begin{minipage}[t]{0.5\linewidth} Thread 1 \begin{enumerate} \item open \item read/write/ioctl \item close \end{enumerate} \end{minipage}% \begin{minipage}[t]{0.5\linewidth} Thread 2 \begin{enumerate} \item open \begin{itemize} \item<2-> succeed? (multi-open?) \item<3-> fail? (exclusive only?) \item<4-> crash? (oops) \end{itemize} \end{enumerate} \end{minipage}% \item detach \end{enumerate} \end{frame} \begin{frame} \frametitle{Complication: concurrent open and close?} If opened multiple times, \texttt{struct cdevsw::d\_close} is called for \emph{last} close only, until next open. \begin{enumerate} \item attach \item T1: open \only<2->{$\Longrightarrow$ call \texttt{d\_open}} \item T2: open \only<3->{$\Longrightarrow$ call \texttt{d\_open} again} \item T1: close \only<4->{$\Longrightarrow$ no driver callback} \item T2: close \only<5->{$\Longrightarrow$ call \texttt{d\_close}} \item detach \end{enumerate} \end{frame} \begin{frame} \frametitle{Complication: open can fail} \begin{enumerate} \item attach \item open called \only<2->{$\Longrightarrow$ call \texttt{d\_open}} \item open fails \only<3->{$\Longrightarrow$ no driver callback---only on successful open} \item detach \end{enumerate} \end{frame} \begin{frame} \frametitle{Complication: concurrent open and close, but open fails?} \begin{enumerate} \item attach \item T1: open \only<2->{$\Longrightarrow$ call \texttt{d\_open}} \item T2: open called \only<3->{$\Longrightarrow$ call \texttt{d\_open} again} \item T1: close \only<4->{$\Longrightarrow$ no driver callback} \item T2: open fails \only<5->{$\Longrightarrow$ call \texttt{d\_close}, despite failure in \emph{this thread}} \item detach \end{enumerate} \end{frame} \begin{frame} \frametitle{Detach} \begin{enumerate} \item Detach triggered by yanking removable device \item Must free resources allocated by attach \item<2-> But what if device is still open? \end{enumerate} \end{frame} \begin{frame} \frametitle{Clearing a road for repaving} How do you clear a road for repaving? \begin{enumerate} \item<2-> \only<2>{Bulldoze it and lay rail for a tram line instead} \only<3>{\sout{Bulldoze it and lay rail for a tram line instead}} \only<4->{Close it off so no new cars can enter} \item<5-> If existing cars are parked, leave a note they need to move \item<6-> Wait for all the cars to leave \end{enumerate} \strut\only<7->{It is now safe to repave the road% \only<8->{ and put in a separated bike lane}.} \end{frame} \begin{frame} \frametitle{Freeing a resource in use} How do you free a resource that may be in use? \begin{enumerate} \item Close it off so no new users can start using it \item If existing users are sleeping indefinitely, wake them \item Wait for all the users to finish \end{enumerate} \strut It is now safe to free the resource. \end{frame} \begin{frame} \frametitle{Detaching an open device} How do you free resources of an autoconf instance with open device nodes using it? \begin{enumerate} \item Prevent new opens \item Interrupt pending I/O (read/write/ioctl) \item Wait for opens and I/O to finish \end{enumerate} It is now safe to free the resources. \pause Difficult---or impossible---to get right inside a driver. \pause Many drivers need this fixed. Can we make it easy to fix them all? \end{frame} \begin{frame}[fragile] \frametitle{device\_t references} \begin{verbatim} dev_t dev; // maj/min num of /dev node device_t dv; // autoconf instance ptr struct foo_softc *sc; // driver private state dv = device_lookup(&foo_cd, FOOUNIT(dev)); if (dv == NULL) return ENXIO; sc = device_private(dv); \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{device\_t references} \begin{verbatim} dev_t dev; // maj/min num of /dev node device_t dv; // autoconf instance ptr struct foo_softc *sc; // driver private state dv = device_lookup(&foo_cd, FOOUNIT(dev)); if (dv == NULL) return ENXIO; sc = device_private(dv); /* dv may be detached and sc freed at this point */ \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{device\_t references} \begin{verbatim} dev_t dev; // maj/min num of /dev node device_t dv; // autoconf instance ptr struct foo_softc *sc; // driver private state dv = device_lookup_acquire(&foo_cd, FOOUNIT(dev)); if (dv == NULL) return ENXIO; sc = device_private(dv); /* dv cannot be detached nor sc freed here */ device_release(dv); \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{device\_t references and bdevsw/cdevsw d\_open} \begin{verbatim} const struct cdevsw foo_cdevsw = { .d_open = fooopen, ... .d_cfdriver = &foo_cd, .d_devtounit = dev_minor_unit, ... }; \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{device\_t references and bdevsw/cdevsw d\_open} \begin{verbatim} static int fooopen(dev_t dev, int flag, int mode, struct lwp *l) { device_t dv = device_lookup(&foo_cd, dev_minor_unit(dev)); struct foo_softc *sc; if (dv == NULL) return ENXIO; sc = device_private(dv); /* dv and sc stable until return */ ... } \end{verbatim} \end{frame} \begin{frame} \frametitle{device\_t references and bdevsw/cdevsw d\_open} \begin{itemize} \item Minimal changes needed to drivers to make device\_lookup safe in d\_open: \begingroup \smallskip\advance\leftskip2em\advance\rightskip2em\relax Add d\_cfdriver and d\_devtounit to \makebox{struct cdevsw}. \par\smallskip \endgroup \item<2-> Note: d\_devtounit must match! \item<3-> Some prefab d\_devtounit functions: \begin{itemize} \item dev\_minor\_unit \item disklabel\_dev\_unit \item tty\_unit \end{itemize} \end{itemize} \end{frame} \begin{frame} \frametitle{Digression: revoke(2) and tty security} \begin{itemize} \item \BSD-specific syscall: revoke(2) \item<2-> On boot, getty(8) opens tty and calls login(1) \item<3-> On successful authentication, login(1) chowns tty to login user \item<4-> After logout, getty(8) chowns tty back to root\par \strut\only<5->{$\Longrightarrow$ user can't open tty \only<7->{\emph{anew}}} \item<6-> getty(8) then revokes tty\par \strut\only<7->{$\Longrightarrow$ user's \emph{existing} opens of tty cease to work} \end{itemize} \end{frame} \begin{frame} \frametitle{Detaching an open device: revoke} \begin{itemize} \item Detach function must revoke open instances before freeing \begin{itemize} \item via \texttt{vdevgone} on the device major number and minor number range \end{itemize} \item Forces d\_close to be called \end{itemize} \end{frame} \begin{frame} \frametitle{Closing an open file in use} What if read, write, or ioctl is still in progress when close happens? \pause \vspace{\baselineskip} Choices of semantics: \begin{description} \item[Linux] Driver state lingers indefinitely until all pending I/O completes. \item[\BSD] I/O is interrupted and fails immediately so driver state can be freed synchronously. \end{description} \pause Focus on \BSD\ semantics here, not merits of choice. \end{frame} \begin{frame} \frametitle{Closing an open file in use} Driver must: \begin{enumerate} \item Prevent new I/O operations \item Interrupt pending I/O operations \item Wait for I/O to finish \end{enumerate} It is now safe to free the driver state. \end{frame} \begin{frame} \frametitle{Closing an open file in use} \NetBSDcurrent\ helps with this. Two approaches: \begin{itemize} \item Legacy drivers: d\_close only. \item Newer drivers: d\_cancel and d\_close. \end{itemize} \end{frame} \begin{frame} \frametitle{Legacy drivers: d\_close only} On close or revoke, \NetBSDcurrent\ will: \begin{itemize} \item prevent new I/O operations from starting (d\_open, d\_read, d\_write, \dots) \pause \item call d\_close, which must interrupt pending I/O and wait for it to complete. \end{itemize} \pause\vspace{\baselineskip} Problem: Most drivers don't wait. \begin{itemize} \item Stop-gap: after d\_close returns, \NetBSDcurrent\ will wait for any concurrent d\_open, d\_read, d\_write, d\_ioctl, etc., before revoke(2) or vdevgone(9) returns. \end{itemize} \pause\vspace{\baselineskip} Note: for drivers where d\_open can hang indefinitely, such as ttys, d\_close must be able to interrupt hanging d\_open! \end{frame} \begin{frame} \frametitle{Newer drivers: d\_cancel and d\_close} On close or revoke, \NetBSDcurrent\ will: \begin{itemize} \item prevent new I/O operations from starting (d\_open, d\_read, d\_write, \dots) \pause \item call d\_cancel, which must interrupt I/O and return promptly \pause \item wait for any concurrent d\_open, d\_read, d\_write, d\_ioctl, etc., to return \pause \item call d\_close, which now has \emph{exclusive access} to this device (chr/blk, major, minor) \end{itemize} \pause This way, drivers don't need custom logic to wait for pending I/O to drain---generic kernel logic takes care of it. \pause\vspace{\baselineskip} Note: for drivers where d\_open can hang indefinitely, such as ttys, d\_cancel must be able to interrupt hanging d\_open! New \texttt{ttycancel} function can be used for d\_cancel in most or all tty drivers. \end{frame} \begin{frame}[fragile] \small \begin{verbatim} static int uhidread(dev_t dev, struct uio *uio, int flag) { struct uhid_softc *sc = device_lookup_private(&uhid_cd, UHIDUNIT(dev)); ... mutex_enter(&sc->sc_lock); while (sc->sc_q.c_cc == 0) { ... if (sc->sc_closing) { mutex_exit(&sc->sc_lock); return EIO; } error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); if (error) break; } ... } \end{verbatim} \end{frame} \begin{frame}[fragile] \small \begin{verbatim} static int uhidcancel(dev_t dev, int flag, int mode, struct lwp *l) { struct uhid_softc *sc = device_lookup_private(&uhid_cd, UHIDUNIT(dev)); if (sc == NULL) return 0; /* Interrupt pending I/O, make it fail promptly. */ mutex_enter(&sc->sc_lock); sc->sc_closing = true; cv_broadcast(&sc->sc_cv); mutex_exit(&sc->sc_lock); uhidev_stop(sc->sc_hdev); return 0; } \end{verbatim} \end{frame} \begin{frame}[fragile] \begin{verbatim} static int uhid_detach(device_t self, int flags) { struct uhid_softc *sc = device_private(self); int maj, mn; /* locate the major number */ maj = cdevsw_lookup_major(&uhid_cdevsw); /* Forcibly close any open instances. */ mn = device_unit(self); vdevgone(maj, mn, mn, VCHR); /* Safe to free resources now! */ ... } \end{verbatim} \end{frame} \begin{frame} \frametitle{Interrupted open must restart} If d\_open sleeps, and d\_cancel or d\_close wakes it \pause (e.g., in a tty driver), \pause after wakeup, permissions checked before d\_open may have changed, \pause so d\_open \emph{must} return \texttt{ERESTART} to restart the system call and redo the permissions checks. \end{frame} \begin{frame} \frametitle{New driver contract: summary} Set d\_cfdriver and d\_devtounit to match device\_lookup use in d\_open; in exchange: \begin{itemize} \item detach prevents new d\_open from starting \item device\_lookup result in d\_open is stable \end{itemize} Set d\_cancel to interrupt pending I/O (including open) and return promptly; in exchange: \begin{itemize} \item d\_close has exclusive access to (chr/blk, maj, min) triple among concurrent devsw functions \item No further I/O (including d\_open) possible until d\_close returns \end{itemize} \pause (Lots of detailed edge cases handled behind the scenes in spec\_vnops.c---very hairy!) \end{frame} \begin{frame} \frametitle{Usage model} \begin{enumerate} \item attach \item while attached: \begin{enumerate}[(a)] \item d\_open on first open \begin{enumerate}[(i)] \item I/O: $(\text{d\_open} \mathbin| \text{d\_read} \mathbin| \text{d\_write} \mathbin| \text{d\_ioctl} \mathbin| \dots)^*$ \item d\_cancel---then \NetBSD\ waits for existing I/O to finish \end{enumerate} \item d\_close on last close \end{enumerate} \item \texttt{vdevgone} returns in detach; no more I/O possible \end{enumerate} \vspace{2\baselineskip} (for drivers with d\_cancel) \end{frame} \begin{frame} \frametitle{Questions?} \centering\scalebox{10}{\rmfamily\large?} \end{frame} \end{document}