\documentclass{beamer} \usepackage{amsmath} \usepackage[T1]{fontenc} \usepackage{graphicx} \usepackage[utf8]{inputenc} \usepackage{qrcode} \usepackage[normalem]{ulem} \title{Flipping Bits: Memory Errors in the Machine} \author{Taylor R Campbell \\ \texttt{riastradh@NetBSD.org}} \date{EuroBSDCon 2024 \\ Dublin, Ireland \\ September 21, 2024} \newcommand{\tlap}[1]{\raisebox{0pt}[0pt][0pt]{\mbox{#1}}} \makeatletter \DeclareRobustCommand\underarrow[1]{\mathop{\vtop{\m@th\ialign{##\crcr $\hfil\displaystyle{#1}\hfil$\crcr \noalign{\kern3\p@\nointerlineskip}% $\pmb\uparrow$\crcr\noalign{\kern3\p@}}}}\limits} \makeatother \begin{document} \frame{\titlepage} \begin{frame} \frametitle{Flipping Bits: Memory Errors in the Máchine} \centering \url{https://www.NetBSD.org/gallery/presentations/riastradh/eurobsdcon2024/memerr.pdf} \vspace{\baselineskip} \qrcode[height=2in]{https://www.NetBSD.org/gallery/presentations/riastradh/eurobsdcon2024/memerr.pdf} \end{frame} \begin{frame}[fragile] \frametitle{Flipping Bits: Memory Errors in the Machine} \scriptsize\ttfamily\obeylines\obeyspaces\def {\mbox{\space}}% \$ git status On branch trunk Changes not staged for commit: (use "git add/rm ..." to update what will be committed) (use "git restore ..." to discard changes in working directory) \textcolor{red}{deleted: "e\textbackslash370ternal/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/ cpls.cc"} \vspace{\baselineskip} Untracked files: (use "git add ..." to include in what will be committed) \textcolor{red}{external/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/cpls.cc} \end{frame} \begin{frame}[fragile] \frametitle{Flipping Bits: Memory Errors in the Machine} \scriptsize\ttfamily\obeylines\obeyspaces\def {\mbox{\space}}% \$ git status On branch trunk Changes not staged for commit: (use "git add/rm ..." to update what will be committed) (use "git restore ..." to discard changes in working directory) \textcolor{red}{deleted: "e\tlap{\color{black}$\underbrace{\mbox{\color{red}\textbackslash370}}$}ternal/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/ cpls.cc"} \vspace{\baselineskip} Untracked files: (use "git add ..." to include in what will be committed) \textcolor{red}{e\tlap{\color{black}$\underarrow{\mbox{\color{red}x}}$}ternal/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/cpls.cc} \end{frame} \begin{frame} \frametitle{Memory error caught in the act} \footnotesize\ttfamily\raggedright\obeylines e\textbackslash370ternal/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/cpls.cc external/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/cpls.cc \end{frame} \begin{frame} \frametitle{Memory error caught in the act} \footnotesize\ttfamily\raggedright\obeylines e\textbackslash370ternal/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/cpls.cc e\textbackslash170ternal/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/cpls.cc \end{frame} \begin{frame} \frametitle{Memory error\only<2->{s} not caught in the act} NetBSD problem reports at \url{https://gnats.NetBSD.org} that I filed before I realized it was bad RAM: \begin{description} \item[kern/57009] \href{https://gnats.NetBSD.org/57009}{zfs crash in sa\_handle\_destroy \textleftarrow\ zfs\_zinactive \textleftarrow\ zfs\_netbsd\_reclaim} \item[kern/57020]<2-> \href{https://gnats.NetBSD.org/57020}{kernel diagnostic assertion \texttt{!RB\_SENTINEL\_P(tree->rbt\_root)} failed: file \dots/sys/arch/x86/x86/pmap.c, line 2261} \item[kern/57024]<3-> \href{https://gnats.NetBSD.org/57024}{panic: solaris assert: \texttt{arc\_decompress(buf) == 0 (0x5 == 0x0)}, file: \dots/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c, line: 4962} \item[kern/57061]<4-> \href{https://gnats.NetBSD.org/57061}{null pointer dereference in zfs dnode\_buf\_evict\_async \textrightarrow\ dnode\_destroy} \end{description} \end{frame} \begin{frame} \frametitle{Coda} \begin{itemize} \item Repeated ZFS scrub turned up no problems% \footnote{ZFS can handle \emph{storage} corruption but not \emph{memory} corruption---see Yupu~Zhang, Abhishek Rajimwale, Andrea~C.~Arpaci-Dusseau, and Remzi~H.~Arpaci-Dusseau, `End-to-end Data Integrity for File Systems: A ZFS Case Study', USENIX FAST 2010. \url{https://www.usenix.org/conference/fast-10/end-end-data-integrity-file-systems-zfs-case-study}} \item Ran BIOS diagnostics for multiple days straight \item Narrowed it down to one of two 32 GB DIMMs \item Submitted RMA to RAM manufacturer citing BIOS tests \item Received replacements for both DIMMs in a week or two \item So far so good \item<2-> \dots as far as I know \end{itemize} \end{frame} \begin{frame} \frametitle{Acronym soup of memory errors} \begin{itemize} \item ECC: Error-correcting codes \item SECDED: Single error correction, double error detection \item EDAC: Error detection and correction \item IID: Independent and identically distributed \end{itemize} \end{frame} \begin{frame} \frametitle{Error detection example: parity bit} \begin{itemize} \item Data bits $d_1 d_2 \dotsc d_n$ have parity bit $p := d_1 \oplus d_2 \oplus \dotsb \oplus d_n$ appended \item Flipping bit $d_k$ to $d'_k = d_k \oplus 1$ gives \[p' := d_1 \oplus d_2 \oplus \dotsb \oplus (d_k \oplus 1) \oplus \dotsb \oplus d_n = p \oplus 1\] \item Data word is corrupt if $p' \ne p$ \end{itemize} \end{frame} \begin{frame} \frametitle{Error correction exampìe: Hamming (7,4) SECDED code} \begin{itemize} \item Four-bit data words $d_1 d_2 d_3 d_4$ encoded as seven-bit code words with three parity bits $p_1 p_2 p_3$ \item $p_1 := d_1 \oplus d_2 \oplus d_4$ \item $p_2 := d_1 \oplus d_3 \oplus d_4$ \item $p_3 := d_2 \oplus d_3 \oplus d_4$ \item If $p_2$ is right but $p_1$ and $p_3$ are wrong, bit $d_2$ was probably flipped---correctable \item If $p_2$ and $p_3$ are right but $p_1$ is wrong, at least two bits must have been flipped, but we don't know which---detected but not correctable \end{itemize} Many other examples in practice for 64-bit RAM words or larger units: Hamming codes, BCH codes, Chipkill, \dots (No Galois theory in this talk.) \end{frame} \begin{frame} \frametitle{EDAC threat model: IID bit flips} \begin{itemize} \item EDAC is \emph{not} security against intelligent adversary \item Assumption: EDAC adversary flips each bit independently with equal probability of flipping any bit---IID \begin{itemize} \item Fancier assumptions: one of four chips may fail altogether---chipkill \end{itemize} \item Non-assumption: Cryptography adversary carefully chooses which bits to flip, requires secret keys and message authentication codes to detect forgery \item<2-> (\dots but there is modern cryptography based on \emph{secret} error-correcting codes, like McEliece) \end{itemize} \end{frame} \begin{frame} \frametitle{What causes memory errors?} \begin{itemize} \item Cosmic rays \item $\alpha$-particles \item Electromagnetic pulses \item Overheating \item Faulty electrical connections \end{itemize} \end{frame} \begin{frame} \frametitle{Where errors can happen} \begin{itemize} \item Hard disks and other persistent storage \item DRAM module \item Memory interconnect \item PCI interconnect \item CPU caches \item CPU registers \end{itemize} \end{frame} \begin{frame} \frametitle{Error severity} \begin{itemize} \item\textbf{Corrected}---% No data lost \item\textbf{Uncorrectable recoverable}---% Data lost, but scope of loss is known, e.g.\ limited to a known word or cache line or page \begin{itemize} \item If page is unused, no problem \item If page is used by userland process, can kill process without other adverse consequences \item If page is used by VM guest, can terminate that VM guest but not others \end{itemize} \item\textbf{Uncorrectable fatal}---% Data lost and corrupt data may have spread arbitrarily far before detection \begin{itemize} \item Corrupt data got copied into cache lines or registers before detection \item Reliable recovery impossible \end{itemize} \end{itemize} \end{frame} \begin{frame} \frametitle{Error persistence} \begin{itemize} \item\textbf{Soft error}---% at location in memory independent of other errors, e.g.\ cosmic ray flipped a bit \item\textbf{Hard error}---% at location of flaky memory, will probably continue to flip bits in the same place \end{itemize} \end{frame} \begin{frame} \frametitle{Error reporting} \begin{itemize} \item\textbf{Synchronous}---% delivered by nonmaskable interrupt when CPU loads corrupted memory \item\textbf{Asynchronous}---% delivered by low-priority interrupt or polling when background memory scrubber runs \end{itemize} \end{frame} \begin{frame} \frametitle{Practical visibility of EFAC} How do \emph{you} know when you got a memory error? \end{frame} \begin{frame}[fragile] \frametitle{Practical visibility of EDAC} Preferably not like this: \vspace{\baselineskip} % XXX Why? \scriptsize\ttfamily\obeylines\obeyspaces\def {\mbox{\space}}% \$ git status On branch trunk Changes not staged for commit: (use "git add/rm ..." to update what will be committed) (use "git restore ..." to discard changes in working directory) \textcolor{red}{deleted: "e\textbackslash370ternal/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/ cpls.cc"} \vspace{\baselineskip} Untracked files: (use "git add ..." to include in what will be committed) \textcolor{red}{external/gpl3/gdb/dist/gdb/testsuite/gdb.linespec/cpls.cc} \end{frame} \begin{frame}[fragile] \frametitle{Practical visibility of EDAC} Preferably more like this: \scriptsize \begin{verbatim} [ 4939.045145] apei0: error source 1 reported hardware error: severity=corrected nentries=1 status=0x12 [ 4939.055146] apei0: error source 1 entry 0: SectionType={0xa5bc1114, 0x6f64,0x4ede,0xb8b8,{0x3e,0x83,0xed,0x7c,0x83,0xb1}} (memory error) [ 4939.075146] apei0: error source 1 entry 0: ErrorSeverity=2 (corrected) [ 4939.075146] apei0: error source 1 entry 0: Revision=0x201 [ 4939.085146] apei0: error source 1 entry 0: Flags=0x1 [ 4939.085146] apei0: error source 1 entry 0: FruText=CorrectedErr [ 4939.095147] apei0: error source 1 entry 0: MemoryErrorType=8 (PARITY_ERROR) \end{verbatim} \end{frame} \begin{frame} \frametitle{Practical visibility of EDAC} \begin{itemize} \item DDR memory controller is hardware device with registers \item Documented only under super-secret vendor NDA \end{itemize} \end{frame} \begin{frame} \frametitle{Real-world prevalence of memory errors} Vendors insist uncorrectable error probability with scrubber is so negligible, why even bother checking? \end{frame} \begin{frame} \frametitle{Real-world p2evalence of memory errors} \begin{quotation} \noindent [W]e observe DRAM error rates that are orders of magnitude higher than previously reported, with 25,000--70,000 errors per billion device hours per Mb and more than 8\% of DIMMs affected by errors per year. We provide strong evidence that memory errors are dominated by hard errors, rather than soft errors, which previous work suspects to be the dominant error mode.\footnote{% Bianca Schroeder, Eduardo Pinheiro, and Wolf-Dietrich Weber, `DRAM errors in the wild: a large-scale field study', Communications of the ACM 54(2), 2011, pp.~100--107, \url{https://dl.acm.org/doi/10.1145/1897816.1897844}.} \end{quotation} \end{frame} \begin{frame} \frametitle{Support for EDAC} \begin{itemize} \item Not all hardware with `ECC RAM' does anything with it! \item Intel Xeon server-class CPUs support ECC RAM, but not desktop/mobile-class CPUs \item $\sim$All AMD CPUs can support ECC RAM \item \dots but some motherboards that physically accept ECC RAM just don't do anything with it! \end{itemize} Must confirm RAM, motherboard, CPU, \emph{and} firmware support EDAC! \end{frame} \begin{frame} \frametitle{Testing EDAC} How do you know what will happen when you get a memory error? \end{frame} \begin{frame} \frametitle{Testing EDAC} \begin{itemize}[<+->] \item Send a cosmic ray at your RAM \begin{itemize} \item Problem: Suns are hard to steer and aim \end{itemize} \item Hold an $\alpha$-emitter up to your RAM \begin{itemize} \item Problem: Polonium-210 is difficult to procure after 2006 \end{itemize} \item Electromagnetic pulse (EMP) gun \begin{itemize} \item Problem: \$4,125 and doesn't ship before EuroBSDCon \end{itemize} \item Rowhammer \begin{itemize} \item Problem: Requires doing science on your RAM to apply a rowhammer attack \end{itemize} \item Error injection \begin{itemize} \item Problem: Not all hardware supports it \item \dots but if your hardware does, this is the easiest option \end{itemize} \end{itemize} \end{frame} \begin{frame} \frametitle{Testing EDAC with error injection} Error injection in principle: \begin{itemize} \item Write to hardware register in memory controller \item Error report comes flying out as if real error \item Engineer confirms it works, moves on to other task \end{itemize} \end{frame} \begin{frame} \frametitle{Testing EDAC with error inkection} Error injection in practice: \begin{itemize}[<+->] \item Wait, how can I control which memory location gets corrupted---make sure it's in an unused test page? \begin{itemize} \item Vendor: You can't. Wherever is the next memory transaction in your highly parallel multicore system with DMA engines doing I/O! \end{itemize} \item Wait, injecting a \emph{correctable} error actually corrupted memory? \begin{itemize} \item panic: diagnostic assertion: ``critical data structure hopelessly destroyed'' failed at kernel.c line 4 \item Workaround: Inject error in parity bits, not data bits, if you can specify error pattern or syndrome \end{itemize} \item Wait, why isn't the machine responding at serial console? \dots or ILO? \dots or remote reset? \dots or\dots \begin{itemize} \item Engineer treks across town to the data center to unplug it and plug it back in again \end{itemize} \end{itemize} \end{frame} \begin{frame} \frametitle{APEI: ACPI Platform Error Interface} % mbox because otherwise this breaks the line after `https://' APEI:\footnote{\mbox{\url{https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html}}} Standard interface in ACPI abstracting EDAC device registers---WARNING: ETLA overload \begin{description} \item[\texttt{BERT}] Boot Error Record Table \item[\texttt{HEST}] Hardware Error Source Table \item[\texttt{EINJ}] Error INJection \item[\texttt{ERST}] Error Record Serialization Table \end{description} Available on \emph{some} server-class machines---check with \texttt{acpidump -dt} or similar \end{frame} \begin{frame}[fragile] \frametitle{APEI: ACPI Platform Error Interface} \footnotesize \begin{verbatim} apei0 at acpi0: ACPI Platform Error Interface apei0: BERT: OemId < AMI,AMI BERT,00000000> AslId < ,00000000> apei0: BERT: 0x14 bytes at 0x7f340c98 apei0: BERT: no boot errors recorded apei0: EINJ: OemId < AMI,AMI EINJ,00000000> AslId < ,00000000> apei0: EINJ: can inject: 0x28 apei0: ERST: OemId < AMIER,AMI ERST,00000000> AslId < ,00000000> apei0: ERST: 0 records in error log 8192 bytes @ 0x7f248050 attr=0 apei0: HEST: OemId < AMI,AMI HEST,00000000> AslId < ,00000000> apei0: HEST: 2 hardware error sources \end{verbatim} \end{frame} \begin{frame} \frametitle{APEI BERT: Boot Error Record Table} Provides error reports early at boot, before OS is listening for active notifications \end{frame} \begin{frame} \frametitle{AREI HEST: Hardware Error Source Table} \begin{itemize} \item Lists sources of hardware error reports \item Covers more than just memory errors---also PCI errors, CPU errors, \dots \item Software can respond to non-memory hardware errors by, e.g., disabling a single faulty PCI device \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{APEI HEST: Hardware Error Source Table} \footnotesize \begin{verbatim} Type={Generic Hardware Error Source} SourceId=0 Enabled={YES} Number of Records to pre-allocate=1 Max Sections per Record=1 Max Raw Data Length=157 Error Status Address=0x000000007f235018:0[64] (Memory) HW Error Notification={ Type={NMI} Length=28 Config Write Enable={} Poll Interval=0 msec Interrupt Vector=2 Switch To Polling Threshold Value=0 Switch To Polling Threshold Window=0 msec Error Threshold Value=0 Error Threshold Window=0 msec } Error Block Length=157 \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{APEI HEST: Hardware Error Source Table} \footnotesize \begin{verbatim} Type={Generic Hardware Error Source} SourceId=1 Enabled={YES} Number of Records to pre-allocate=1 Max Sections per Record=1 Max Raw Data Length=157 Error Status Address=0x000000007f2350c0:0[64] (Memory) HW Error Notification={ Type={POLLED} Length=28 Config Write Enable={POLL_INTERVAL,POLL_THRESHOLD_VALUE, POLL_THRESHOLD_WINDOW,ERR_THRESHOLD_VALUE, ERR_THRESHOLD_WINDOW} Poll Interval=60000 msec Interrupt Vector=2 Switch To Polling Threshold Value=0 Switch To Polling Threshold Window=0 msec Error Threshold Value=0 Error Threshold Window=0 msec } Error Block Length=157 \end{verbatim} \end{frame} \begin{frame} \frametitle{APEI EINJ: Erros Injection table} \begin{itemize}[<+->] \item \only<1>{List of supported error injection actions} \only<2->{\sout{List of supported error injection actions}} \item \only<1>{\phantom{Interpreter for programming language of supported error injection actions}} \only<2>{Interpreter for programming language of supported error injection actions} \only<3->{\sout{Interpreter for programming language of supported error injection actions}} \item Programming language for interpreter for programming language for supported error actions \end{itemize} \end{frame} \begin{frame} \frametitle{APEI EINJ: Error Injection table} Actions: \begin{itemize} \item BEGIN\_INJECTION\_OPERATION \item SET\_ERROR\_TYPE \item SET\_ERROR\_TYPE\_WITH\_ADDRESS \item EXECUTE\_OPERATION \item CHECK\_BUSY\_STATUS \item GET\_COMMAND\_STATUS \item GET\_TRIGGER\_ERROR\_ACTION\_TABLE \only<2->{\dots meta-action} \item \dots \end{itemize} \end{frame} \begin{frame} \frametitle{APEI EINJ: Error Injection table} Instructions: \begin{itemize} \item READ\_REGISTER \item READ\_REGISTER\_VALUE (read and compare w/immediate) \item WRITE\_REGISTER \item WRITE\_REGISTER\_VALUE (write immediate to register) \item NOOP \end{itemize} \end{frame} \begin{frame} \frametitle{APEÉ EINJ: Error Injection table} \scriptsize \begin{tabular}{llll} Action & Instruction & Register & Value \\[\smallskipamount] SET\_ERROR\_TYPE & WRITE\_REGISTER\_VALUE & 0x1234 & 0x42 \\ SET\_ERROR\_TYPE & WRITE\_REGISTER & 0x1238 & --- \\ SET\_ERROR\_TYPE & READ\_REGISTER & 0x123c & --- \\ EXECUTE\_OPERATION & READ\_REGISTER & 0x1000 & --- \\ SET\_ERROR\_TYPE & WRITE\_REGISTER & 0x1240 & --- \\ GET\_ERROR\_STATUS & READ\_REGISTER\_VALUE & 0x1200 & 0x8 \\ \vdots & \vdots & \vdots & \vdots \end{tabular} \end{frame} \begin{frame}[fragile] \frametitle{APEI EINJ: Error Injection table} \small \begin{verbatim} ACTION={Begin Operation} INSTRUCTION={Write Register Value} FLAGS={} RegisterRegion=0x7f236f98:0[8] (Memory) MASK=0x000000ff ACTION={Get Trigger Table} INSTRUCTION={Read Register} FLAGS={} RegisterRegion=0x000000007f236f9a:0[64] (Memory) MASK=0xffffffffffffffff ACTION={Set Error Type} INSTRUCTION={Write Register} FLAGS={} RegisterRegion=0x7f236fa2:0[32] (Memory) MASK=0xffffffff \end{verbatim} \end{frame} \begin{frame} \frametitle{APEI EINJ: Error Injec|ion table} To inject an error, software must execute a sequence of actions: \begin{itemize} \item BEGIN\_INJECTION\_OPERATION \item SET\_ERROR\_TYPE(0x8=\textless Memory Correctable\textgreater) \item EXECUTE\_OPERATION \item busy-wait until CHECK\_BUSY\_STATUS returns completion \item check GET\_COMMAND\_STATUS \item execute the GET\_TRIGGER\_ERROR\_ACTION\_TABLE instructions \end{itemize} \end{frame} \begin{frame} \frametitle{APEI ERST: Error Recîrd Serialization Table} \begin{itemize}[<+->] \item \only<1>{Persistent storage for small files} \only<2->{\sout{Persistent storage for small files}} \item \only<1>{\phantom{Programming language for reading and writing small files}} \only<2>{Programming language for reading and writing small files} \only<3->{\sout{Programming language for reading and writing small files}} \item \only<1-2>{\phantom{Interpreter for programming language for reading and writing small files}} \only<3>{Interpreter for programming language for reading and writing small files} \only<4->{\sout{Interpreter for programming language for reading and writing small files}} \item Programming language for interpreter for programming language for reading and writing small files \end{itemize} \end{frame} \begin{frame} \frametitle{APEI ERST: Error Record Serialization Table} Actions: \begin{itemize} \item BEGIN\_WRITE\_OPERATION \item BEGIN\_READ\_OPERATION \item BEGIN\_CLEAR\_OPERATION \item END\_OPERATION \item EXECUTE\_OPERATION \item SET\_RECORD\_OFFSET \item CHECK\_BUSY\_STATUS \item GET\_COMMAND\_STATUS \item GET\_RECORD\_COUNT \item \dots \end{itemize} \end{frame} \begin{frame} \frametitle{APEI ERST: Error Record Serialization Tab,e} Instructions: \begin{itemize} \item READ\_REGISTER, READ\_REGISTER\_VALUE \item WRITE\_REGISTER, WRITE\_REGISTER\_VALUE \item NOOP \item LOAD\_VAR1, LOAD\_VAR2 \item STORE\_VAR1 \item ADD, SUBTRACT \item ADD\_VALUE, SUBTRACT\_VALUE \item STALL, STALL\_WHILE\_TRUE \item SKIP\_NEXT\_INSTRUCTION\_IF\_TRUE \item GOTO \item SET\_SRC\_ADDRESS\_BASE \item SET\_DST\_ADDRESS\_BASE \item MOVE\_DATA \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{APEI ErST: Error Record Serialization Table} \small \begin{verbatim} ACTION={Set Record Offset} INSTRUCTION={Write Register} FLAGS={} RegisterRegion=0x7f24801c:0[32] (Memory) MASK=0xffffffff ACTION={Execute Operation} INSTRUCTION={Write Register Value} FLAGS={} RegisterRegion=0xb2:0[8] (IO) MASK=0x000000ff ACTION={Check Busy Status} INSTRUCTION={Read Register Value} FLAGS={} RegisterRegion=0x7f248020:0[8] (Memory) MASK=0x000000ff \end{verbatim} \end{frame} \begin{frame} \frametitle{APEI ERST: Error Record Serialization Table} \begin{itemize} \item Not completely bonkers: executing ERST actions is maybe less risky than running full ACPI interpreter \item Could use it to save dmesg or core dump on crash in delicate context (no memory allocation, no locks, \dots) \item Exposed in Linux as a file system `pstore' \item Not yet used by NetBSD---future work! \end{itemize} \end{frame} \begin{frame} \frametitle{NetBSD support} NetBSD support for APEI: \begin{itemize} \item \texttt{apei(4)} driver \item Enabled in current on x86/GENERIC, soon on Arm, aimed at 11.0 or maybe even 10.1 \item Supports detecting reports from common hardware error source types \item Supports crude sysctl interface to EINJ \item Code is there to interpret ERST action interpreter, but nothing uses it yet \end{itemize} \end{frame} \begin{frame} \frametitle{Live demo} (This space left intentionally blank. Hard to show cosmic rays in a slide.) \end{frame} \begin{frame} \frametitle{Questions?} \centering\Huge ? \end{frame} \end{document}