ref: 4696e9dc4fda1400c881f2177183da3b024633f1
parent: 01cf9608db97e2b4fe6bec921ab1c28201605f82
author: iriri <iri@konnichiwastevenspielbergde.su>
date: Tue Mar 12 21:58:49 EDT 2019
Fix use-after-free in futex-based semaphore implementations. The current implementation of `sem_post` first increments the value of the semaphore and then checks the waiter count to see if it must call `ftxwake`. However, it is possible for `sem_wait` to return at any time after the value has been incremented, meaning that the memory this check looks at might already be deallocated. Happily, it turns out the OS X futex operations actually take a 32 bit value which greatly simplifies things.
--- a/lib/sys/sys+osx-x64.myr
+++ b/lib/sys/sys+osx-x64.myr
@@ -835,8 +835,8 @@
-> int)
/* ulock */
- const ulock_wait : (op : ulockop, uaddr : uint64#, val : uint64, timeout : uint32 -> int)
- const ulock_wake : (op : ulockop, uaddr : uint64#, wakeval : uint64 -> int)
+ const ulock_wait : (op : ulockop, uaddr : void#, val : uint64, timeout : uint32 -> int)
+ const ulock_wake : (op : ulockop, uaddr : void#, wakeval : uint64 -> int)
extern const cstring : (str : byte[:] -> byte#)
/* filled by start code */
@@ -1110,7 +1110,7 @@
}
const ulock_wake = {op, uaddr, wakeval
- -> (syscall(Sysulock_wake, a(op), uaddr, a(wakeval)) : int)
+ -> (syscall(Sysulock_wake, a(op), uaddr, wakeval) : int)
}
const waitstatus = {st
--- a/lib/thread/futex+osx.myr
+++ b/lib/thread/futex+osx.myr
@@ -4,8 +4,13 @@
use "atomic"
use "common"
+/*
+The `ulock_` functions are undocumented but the relevant source can be found at
+https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/bsd/kern/sys_ulock.c
+*/
pkg thread =
- type ftxtag = uint64
+ /* `UL_COMPARE_AND_WAIT` only uses 32 bits of the value (line 407) */
+ type ftxtag = uint32
impl atomic ftxtag
const ftxwait : (uaddr : ftxtag#, val : ftxtag, tmout : std.time -> sys.errno)
@@ -13,16 +18,12 @@
const ftxwakeall : (uaddr : ftxtag# -> void)
;;
-/*
- * The ulock_ functions are undocumented but the relevant source can be found at
- * https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/bsd/kern/sys_ulock.c
- */
const ftxwait = {uaddr, val, tmout
var rc
if tmout < 0
while (rc = (sys.ulock_wait(sys.Ulockcompareandwait,
- (uaddr : uint64#),
+ (uaddr : void#),
(val : uint64),
0) : sys.errno)) == sys.Eintr
;;
@@ -34,7 +35,7 @@
t = (tmout : uint32)
while (rc = (sys.ulock_wait(sys.Ulockcompareandwait,
- (uaddr : uint64#),
+ (uaddr : void#),
(val : uint64),
t) : sys.errno)) == sys.Eintr
var now
@@ -69,17 +70,17 @@
}
const ftxwake = {uaddr
- sys.ulock_wake(sys.Ulockcompareandwait, (uaddr : uint64#), 0)
+ sys.ulock_wake(sys.Ulockcompareandwait, (uaddr : void#), 0)
}
const ftxwakeall = {uaddr
- sys.ulock_wake(sys.Ulockcompareandwait | sys.Ulockulfwakeall, (uaddr : uint64#), 0)
+ sys.ulock_wake(sys.Ulockcompareandwait | sys.Ulockulfwakeall, (uaddr : void#), 0)
}
impl atomic ftxtag =
- xget = {p; -> (xget64((p : uint64#)) : ftxtag)}
- xset = {p, v; xset64((p : uint64#), (v : uint64))}
- xadd = {p, v; -> (xadd64((p : uint64#), (v : uint64)) : ftxtag)}
- xcas = {p, old, new; -> (xcas64((p : uint64#), (old : uint64), (new : uint64)) : ftxtag)}
- xchg = {p, v; -> (xchg64((p : uint64#), (v : uint64)) : ftxtag)}
+ xget = {p; -> (xget32((p : uint32#)) : ftxtag)}
+ xset = {p, v; xset32((p : uint32#), (v : uint32))}
+ xadd = {p, v; -> (xadd32((p : uint32#), (v : uint32)) : ftxtag)}
+ xcas = {p, old, new; -> (xcas32((p : uint32#), (old : uint32), (new : uint32)) : ftxtag)}
+ xchg = {p, v; -> (xchg32((p : uint32#), (v : uint32)) : ftxtag)}
;;
--- a/lib/thread/sem+futex.myr
+++ b/lib/thread/sem+futex.myr
@@ -4,6 +4,10 @@
use "futex"
pkg thread =
+ /*
+ We want to be able to read both members in the same atomic operation
+ and this implementation assumes `ftxtag` is 32 bits.
+ */
type sem = struct
_val : ftxtag
_nwaiters : uint32
@@ -48,9 +52,20 @@
}
const sempost = {s
- std.assert((xadd(&s._val, 1) : uint32) != ~0x0, "error: semaphore overflowed\n")
+ /*
+ It's possible for the semaphore to be deallocated at any time after
+ `_val` is incremented so we must not dereference `s` a second time. We
+ work around this by also reading the value of `_nwaiters` during the
+ atomic fetch and add.
+ */
+ var state = xadd((s : uint64#), 1)
+ std.assert((state : uint32) != ~0x0, "error: semaphore overflowed\n")
- if xget(&s._nwaiters) > 0
+ if (state >> 32) > 0
+ /*
+ However, it is both legal and expected to pass a potentially
+ invalid address to `ftxwake`.
+ */
ftxwake(&s._val)
;;
}