-
Notifications
You must be signed in to change notification settings - Fork 1
/
semaphore.rs
744 lines (603 loc) · 22.5 KB
/
semaphore.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
cfg_if! {
if #[cfg(fuzz)] {
use syncbox_fuzz::{
futures::AtomicTask,
sync::{
CausalCell,
atomic::{AtomicUsize, AtomicPtr},
},
yield_now,
};
} else {
use CausalCell;
use _futures::task::AtomicTask;
use std::sync::atomic::{AtomicUsize, AtomicPtr};
macro_rules! debug {
($($t:tt)*) => {};
}
}
}
use crossbeam_utils::CachePadded;
use _futures::Poll;
use std::fmt;
use std::ptr::{self, NonNull};
use std::sync::Arc;
use std::sync::atomic::Ordering::{self, Acquire, Release, AcqRel, Relaxed};
/// Futures-aware semaphore.
pub struct Semaphore {
/// Tracks both the waiter queue tail pointer and the number of remaining
/// permits.
state: CachePadded<AtomicUsize>,
/// waiter queue head pointer.
head: CausalCell<NonNull<WaiterNode>>,
/// Coordinates access to the queue head.
rx_lock: AtomicUsize,
/// Stub waiter node used as part of the MPSC channel algorithm.
stub: Box<WaiterNode>,
}
/// Wait on a semaphore.
#[derive(Debug)]
pub struct Waiter(Option<Arc<WaiterNode>>);
/// Node used to notify the semaphore waiter when permit is available.
#[derive(Debug)]
struct WaiterNode {
/// Stores waiter state.
///
/// See `NodeState` for more details.
state: AtomicUsize,
/// Task to notify when a permit is made available.
task: AtomicTask,
/// Next pointer in the queue of waiting senders.
next: AtomicPtr<WaiterNode>,
}
/// Semaphore state
///
/// The 2 low bits track the modes.
///
/// - Closed
/// - Full
///
/// When not full, the rest of the `usize` tracks the total number of messages
/// in the channel. When full, the rest of the `usize` is a pointer to the tail
/// of the "waiting senders" queue.
#[derive(Copy, Clone)]
struct SemState(usize);
/// Waiter node state
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[repr(usize)]
enum NodeState {
/// Not waiting for a permit and the node is not in the wait queue.
///
/// This is the initial state.
Idle = 0,
/// Not waiting for a permit but the node is in the wait queue.
///
/// This happens when the waiter has previously requested a permit, but has
/// since canceled the request. The node cannot be removed by the waiter, so
/// this state informs the receiver to skip the node when it pops it from
/// the wait queue.
Queued = 1,
/// Waiting for a permit and the node is in the wait queue.
QueuedWaiting = 2,
/// The waiter has been assigned a permit and the node has been removed from
/// the queue.
Assigned = 3,
}
// ===== impl Semaphore =====
impl Semaphore {
/// Creates a new semaphore with the initial number of permits
///
/// # Panics
///
/// Panics if `permits` is zero.
pub fn new(permits: usize) -> Semaphore {
assert!(permits > 0, "permits must be greater than zero");
let stub = Box::new(WaiterNode::new());
let ptr = NonNull::new(&*stub as *const _ as *mut _).unwrap();
// Allocations are aligned
debug_assert!(ptr.as_ptr() as usize & NUM_FLAG == 0);
let state = SemState::new(permits);
Semaphore {
state: CachePadded::new(AtomicUsize::new(state.to_usize())),
head: CausalCell::new(ptr),
rx_lock: AtomicUsize::new(0),
stub,
}
}
/// Returns the current number of available permits
pub fn available_permits(&self) -> usize {
let curr = SemState::load(&self.state, Acquire);
curr.available_permits()
}
/// Poll for a permit
pub fn poll_permit(&self, mut waiter: Option<&mut Waiter>) -> Poll<(), ()> {
use _futures::Async::*;
// Load the current state
let mut curr = SemState::load(&self.state, Acquire);
debug!(" + poll_permit; sem-state = {:?}", curr);
// Tracks a *mut WaiterNode representing an Arc clone.
//
// This avoids having to bump the ref count unless required.
let mut maybe_strong = None;
loop {
let mut next = curr;
if !next.acquire_permit(&self.stub) {
debug!(" + poll_permit -- no permits");
debug_assert!(curr.waiter().is_some());
if maybe_strong.is_none() {
if let Some(ref mut waiter) = waiter {
// Get the Sender's waiter node, or initialize one
let waiter = waiter.0
.get_or_insert_with(|| Arc::new(WaiterNode::new()));
waiter.register();
debug!(" + poll_permit -- to_queued_waiting");
if !waiter.to_queued_waiting() {
debug!(" + poll_permit; waiter already queued");
// The node is alrady queued, there is no further work
// to do.
return Ok(NotReady);
}
maybe_strong = Some(WaiterNode::into_non_null(waiter.clone()));
} else {
// If no `waiter`, then the task is not registered and there
// is no further work to do.
return Err(());
}
}
next.set_waiter(maybe_strong.unwrap());
}
debug!(" + poll_permit -- pre-CAS; next = {:?}", next);
debug_assert_ne!(curr.0, 0);
debug_assert_ne!(next.0, 0);
match next.compare_exchange(&self.state, curr, AcqRel, Acquire) {
Ok(_) => {
debug!(" + poll_permit -- CAS ok");
match curr.waiter() {
Some(prev_waiter) => {
let waiter = maybe_strong.unwrap();
// Finish pushing
unsafe {
prev_waiter.as_ref()
.next.store(waiter.as_ptr(), Release);
}
debug!(" + poll_permit -- waiter pushed");
return Ok(NotReady);
}
None => {
debug!(" + poll_permit -- permit acquired");
if let Some(waiter) = maybe_strong {
// The waiter was cloned, but never got queued.
// Before enterig `inc_num_messages`, the waiter was
// in the `Idle` state. We must transition the node
// back to the idle state.
let waiter = unsafe { Arc::from_raw(waiter.as_ptr()) };
waiter.revert_to_idle();
}
return Ok(Ready(()));
}
}
}
Err(actual) => {
curr = actual;
}
}
}
}
/// Release one permit back to the sempahore.
///
/// This either increments the number of available permits or notifies a
/// pending waiter.
pub fn release_one(&self) {
debug!(" + release_one");
let prev = self.rx_lock.fetch_add(1, AcqRel);
if prev != 0 {
debug!("+ release_one; locked");
// Another thread has the lock and will be responsible for notifying
// pending waiters.
return;
}
// The remaining amount of permits to release back to the semaphore.
let mut rem = 1;
while rem > 0 {
// Release the permits
self.release_n(rem);
let actual = self.rx_lock.fetch_sub(rem, AcqRel);
rem = actual - rem;
}
}
/// Release a specific amount of permits to the semaphore
fn release_n(&self, mut n: usize) {
while n > 0 {
let waiter = match self.pop(n) {
Some(waiter) => waiter,
None => {
return;
}
};
debug!(" + release_n -- notify");
if waiter.notify() {
n -= 1;
debug!(" + release_n -- dec");
}
}
}
/// Pop a waiter
///
/// `rem` represents the remaining number of times the caller will pop. If
/// there are no more waiters to pop, `rem` is used to set the available
/// permits.
fn pop(&self, rem: usize) -> Option<Arc<WaiterNode>> {
debug!(" + pop; rem = {}", rem);
'outer:
loop {
unsafe {
let mut head = self.head.with(|head| *head);
let mut next_ptr = head.as_ref().next.load(Acquire);
let stub = self.stub();
if head == stub {
debug!(" + pop; head == stub");
let next = match NonNull::new(next_ptr) {
Some(next) => next,
None => {
// This loop is not part of the standard intrusive mpsc
// channel algorithm. This is where we atomically pop
// the last task and add `rem` to the remaining capacity.
//
// This modification to the pop algorithm works because,
// at this point, we have not done any work (only done
// reading). We have a *pretty* good idea that there is
// no concurrent pusher.
//
// The capacity is then atomically added by doing an
// AcqRel CAS on `state`. The `state` cell is the
// linchpin of the algorithm.
//
// By successfully CASing `head` w/ AcqRel, we ensure
// that, if any thread was racing and entered a push, we
// see that and abort pop, retrying as it is
// "inconsistent".
let mut curr = SemState::load(&self.state, Acquire);
loop {
if curr.has_waiter(&self.stub) {
// Inconsistent
debug!(" + pop; inconsistent 1");
yield_now();
continue 'outer;
}
let mut next = curr;
next.release_permits(rem, &self.stub);
match next.compare_exchange(&self.state, curr, AcqRel, Acquire) {
Ok(_) => return None,
Err(actual) => {
curr = actual;
}
}
}
}
};
debug!(" + pop; got next waiter");
self.head.with_mut(|head| *head = next);
head = next;
next_ptr = next.as_ref().next.load(Acquire);
}
if let Some(next) = NonNull::new(next_ptr) {
self.head.with_mut(|head| *head = next);
return Some(Arc::from_raw(head.as_ptr()));
}
let state = SemState::load(&self.state, Acquire);
// This must always be a pointer as the wait list is not empty.
let tail = state.waiter().unwrap();
if tail != head {
// Inconsistent
debug!(" + pop; inconsistent 2");
yield_now();
continue 'outer;
}
self.push_stub();
next_ptr = head.as_ref().next.load(Acquire);
if let Some(next) = NonNull::new(next_ptr) {
self.head.with_mut(|head| *head = next);
return Some(Arc::from_raw(head.as_ptr()));
}
// Inconsistent state, loop
debug!(" + pop; inconsistent 3");
yield_now();
}
}
}
unsafe fn push_stub(&self) {
let stub = self.stub();
// Set the next pointer. This does not require an atomic operation as
// this node is not accessible. The write will be flushed with the next
// operation
stub.as_ref().next.store(ptr::null_mut(), Relaxed);
// Update the tail to point to the new node. We need to see the previous
// node in order to update the next pointer as well as release `task`
// to any other threads calling `push`.
let prev = SemState::new_ptr(stub)
.swap(&self.state, AcqRel);
// The stub is only pushed when there are pending tasks. Because of
// this, the state must *always* be in pointer mode.
let prev = prev.waiter().unwrap();
// We don't want the *existing* pointer to be a stub.
debug_assert_ne!(prev, stub);
// Release `task` to the consume end.
prev.as_ref().next.store(stub.as_ptr(), Release);
}
fn stub(&self) -> NonNull<WaiterNode> {
unsafe {
NonNull::new_unchecked(&*self.stub as *const _ as *mut _)
}
}
}
// ===== impl Waiter =====
impl Waiter {
pub fn new() -> Waiter {
Waiter(None)
}
pub fn acquire(&self) -> bool {
self.0.as_ref()
.map(|node| node.acquire())
.unwrap_or(false)
}
}
// ===== impl WaiterNode =====
impl WaiterNode {
fn new() -> WaiterNode {
WaiterNode {
state: AtomicUsize::new(NodeState::new().to_usize()),
task: AtomicTask::new(),
next: AtomicPtr::new(ptr::null_mut()),
}
}
fn acquire(&self) -> bool {
use self::NodeState::*;
Idle.compare_exchange(&self.state, Assigned, AcqRel, Acquire).is_ok()
}
fn register(&self) {
self.task.register()
}
/// Transition the state to `QueuedWaiting`.
///
/// This step can only happen from `Queued` or from `Idle`.
///
/// Returns `true` if transitioning into a queued state.
fn to_queued_waiting(&self) -> bool {
use self::NodeState::*;
let mut curr = NodeState::load(&self.state, Acquire);
loop {
debug_assert!(curr == Idle || curr == Queued, "actual = {:?}", curr);
let next = QueuedWaiting;
match next.compare_exchange(&self.state, curr, AcqRel, Acquire) {
Ok(_) => {
if curr.is_queued() {
return false;
} else {
// Transitioned to queued, reset next pointer
self.next.store(ptr::null_mut(), Relaxed);
return true;
}
}
Err(actual) => {
curr = actual;
}
}
}
}
/// Notify the waiter
///
/// Returns `true` if the waiter accepts the notification
fn notify(&self) -> bool {
use self::NodeState::*;
// Assume QueuedWaiting state
let mut curr = QueuedWaiting;
loop {
let next = match curr {
Queued => Idle,
QueuedWaiting => Assigned,
actual => panic!("actual = {:?}", actual),
};
match next.compare_exchange(&self.state, curr, AcqRel, Acquire) {
Ok(_) => {
match curr {
QueuedWaiting => {
debug!(" + notify -- task notified");
self.task.notify();
return true;
}
other => {
debug!(" + notify -- not notified; state = {:?}", other);
return false;
}
}
}
Err(actual) => {
curr = actual
}
}
}
}
fn revert_to_idle(&self) {
use self::NodeState::Idle;
// There are no other handles to the node
NodeState::store(&self.state, Idle, Relaxed);
}
fn into_non_null(arc: Arc<WaiterNode>) -> NonNull<WaiterNode> {
let ptr = Arc::into_raw(arc);
unsafe { NonNull::new_unchecked(ptr as *mut _) }
}
}
// ===== impl State =====
/// Flag differentiating between available permits and waiter pointers.
///
/// If we assume pointers are properly aligned, then the least significant bit
/// will always be zero. So, we use that bit to track if the value represents a
/// number.
const NUM_FLAG: usize = 0b1;
/// When representing "numbers", the state has to be shifted this much (to get
/// rid of the flag bit).
const NUM_SHIFT: usize = 1;
impl SemState {
/// Returns a new default `State` value.
fn new(permits: usize) -> SemState {
SemState((permits << NUM_SHIFT) | NUM_FLAG)
}
/// Returns a `State` tracking `ptr` as the tail of the queue.
fn new_ptr(tail: NonNull<WaiterNode>) -> SemState {
SemState(tail.as_ptr() as usize)
}
/// Returns the amount of remaining capacity
fn available_permits(&self) -> usize {
if !self.has_available_permits() {
return 0;
}
self.0 >> NUM_SHIFT
}
/// Returns true if the state has permits that can be claimed by a waiter.
fn has_available_permits(&self) -> bool {
self.0 & NUM_FLAG == NUM_FLAG
}
fn has_waiter(&self, stub: &WaiterNode) -> bool {
!self.has_available_permits() && !self.is_stub(stub)
}
/// Try to acquire a permit
///
/// # Return
///
/// Returns `true` if the permit was acquired, `false` otherwise. If `false`
/// is returned, it can be assumed that `State` represents the head pointer
/// in the mpsc channel.
fn acquire_permit(&mut self, stub: &WaiterNode) -> bool {
if !self.has_available_permits() {
return false;
}
debug_assert!(self.0 != 1);
debug_assert!(self.waiter().is_none());
self.0 -= 1 << NUM_SHIFT;
if self.0 == NUM_FLAG {
// Set the state to the stub pointer.
self.0 = stub as *const _ as usize;
}
true
}
/// Release permits
///
/// Returns `true` if the permits were accepted.
fn release_permits(&mut self, permits: usize, stub: &WaiterNode) {
debug_assert!(permits > 0);
if self.is_stub(stub) {
self.0 = (permits << NUM_SHIFT) | NUM_FLAG;
return;
}
debug_assert!(self.has_available_permits());
self.0 += permits << NUM_SHIFT;
}
fn is_waiter(&self) -> bool {
self.0 & NUM_FLAG == 0
}
/// Returns the waiter, if one is set.
fn waiter(&self) -> Option<NonNull<WaiterNode>> {
if self.is_waiter() {
let waiter = NonNull::new(self.0 as *mut WaiterNode)
.expect("null pointer stored");
Some(waiter)
} else {
None
}
}
/// Set to a pointer to a waiter.
///
/// This can only be done from the full state.
fn set_waiter(&mut self, waiter: NonNull<WaiterNode>) {
let waiter = waiter.as_ptr() as usize;
debug_assert!(waiter & NUM_FLAG == 0);
self.0 = waiter;
}
fn is_stub(&self, stub: &WaiterNode) -> bool {
self.0 == stub as *const _ as usize
}
/// Load the state from an AtomicUsize.
fn load(cell: &AtomicUsize, ordering: Ordering) -> SemState {
let value = cell.load(ordering);
debug!(" + SemState::load; value = {}", value);
SemState(value)
}
/// Swap the values
fn swap(&self, cell: &AtomicUsize, ordering: Ordering) -> SemState {
SemState(cell.swap(self.to_usize(), ordering))
}
/// Compare and exchange the current value into the provided cell
fn compare_exchange(&self,
cell: &AtomicUsize,
prev: SemState,
success: Ordering,
failure: Ordering)
-> Result<SemState, SemState>
{
let res = cell.compare_exchange(prev.to_usize(), self.to_usize(), success, failure);
debug!(" + SemState::compare_exchange; prev = {}; next = {}; result = {:?}",
prev.to_usize(), self.to_usize(), res);
res.map(SemState)
.map_err(SemState)
}
/// Converts the state into a `usize` representation.
fn to_usize(&self) -> usize {
self.0
}
}
impl fmt::Debug for SemState {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let mut fmt = fmt.debug_struct("SemState");
if self.is_waiter() {
fmt.field("state", &"<waiter>");
} else {
fmt.field("permits", &self.available_permits());
}
fmt.finish()
}
}
// ===== impl NodeState =====
impl NodeState {
fn new() -> NodeState {
NodeState::Idle
}
fn from_usize(value: usize) -> NodeState {
use self::NodeState::*;
match value {
0 => Idle,
1 => Queued,
2 => QueuedWaiting,
3 => Assigned,
_ => panic!(),
}
}
fn load(cell: &AtomicUsize, ordering: Ordering) -> NodeState {
NodeState::from_usize(cell.load(ordering))
}
/// Store a value
fn store(cell: &AtomicUsize, value: NodeState, ordering: Ordering) {
cell.store(value.to_usize(), ordering);
}
fn compare_exchange(&self,
cell: &AtomicUsize,
prev: NodeState,
success: Ordering,
failure: Ordering)
-> Result<NodeState, NodeState>
{
cell.compare_exchange(prev.to_usize(), self.to_usize(), success, failure)
.map(NodeState::from_usize)
.map_err(NodeState::from_usize)
}
/// Returns `true` if `self` represents a queued state.
pub fn is_queued(&self) -> bool {
use self::NodeState::*;
match *self {
Queued | QueuedWaiting => true,
_ => false,
}
}
fn to_usize(&self) -> usize {
*self as usize
}
}