-
Notifications
You must be signed in to change notification settings - Fork 2
/
ultmigration.c
249 lines (210 loc) · 6.68 KB
/
ultmigration.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
/*
* Copyright © 2017, Mathias Gottschlag
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#define _GNU_SOURCE
#include "ultmigration.h"
#include <assert.h>
#include <pthread.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include <sched.h>
#include <semaphore.h>
/* global initialization */
static int klt_count = 0;
static int initialized = 0;
pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
// Bits 7:4 specify the C-State.
static const uint32_t MWAIT_CSTATE = 0x00;
struct thread_pool_info;
/* current ULT */
struct current_thread_info {
struct thread_pool_info *pool_thread;
uintptr_t stack;
char aux_thread[4096];
sem_t exit_sem;
};
static __thread struct current_thread_info *current;
/* thread pool for ULT execution */
#define THREAD_POOL_SIZE 1
#define STOP_THREAD ((void*)(uintptr_t)(-1))
struct thread_pool_info {
struct current_thread_info *queue[8];
uintptr_t stack;
pthread_t thread;
int cpu;
} __attribute__((aligned(64)));
// We have ULT_TYPE_MAX types of threads. For each type, there is a pool of
// THREAD_POOL_SIZE threads.
static struct thread_pool_info pool[ULT_TYPE_MAX][THREAD_POOL_SIZE];
static inline void __monitor(const void *address)
{
/* "monitor %eax, %ecx, %edx;" */
__asm volatile(".byte 0x0f, 0x01, 0xc8;"
:: "a" (address), "c" (0), "d"(0));
}
static inline void __mwait(uint32_t cstate)
{
/* "mwait %eax, %ecx;" */
__asm volatile(".byte 0x0f, 0x01, 0xc9;"
:: "a" (cstate), "c" (0));
}
struct current_thread_info *ult_pick_next_thread(struct thread_pool_info *pool_thread) {
struct current_thread_info *next = NULL;
int i;
/* poll until a ULT is scheduled to run on this thread */
while (1) {
mwait_retry:
/* check all the queue entries for a non-null entry */
for (i = 0; i < 8; i++) {
/*next = __atomic_exchange_n(&pool_thread->queue[i],
next,
__ATOMIC_SEQ_CST);*/
next = __atomic_load_n(&pool_thread->queue[i],
__ATOMIC_SEQ_CST);
if (next) {
/* does not need to be atomic, we are the only
* writer */
__atomic_store_n(&pool_thread->queue[i],
NULL,
__ATOMIC_SEQ_CST);
return next;
}
}
/* if none was found, sleep until the cache line changes */
__monitor(pool_thread->queue);
for (i = 0; i < 8; i++) {
if (pool_thread->queue[i] != NULL) {
goto mwait_retry;
}
}
__mwait(MWAIT_CSTATE);
/* try again */
}
}
void ult_set_pool_thread_affinity(struct thread_pool_info *pool_thread) {
cpu_set_t cpus;
CPU_ZERO(&cpus);
CPU_SET(pool_thread->cpu, &cpus);
sched_setaffinity(0, sizeof(cpus), &cpus);
}
extern void *ult_pool_thread_entry(void *param);
static void ult_initialize(void) {
int i, t;
char *cpu_list, *end;
/* analyze the processor topology */
char *slow_cpu = getenv("SLOW_CPU");
char *fast_cpu = getenv("FAST_CPU");
assert(slow_cpu != NULL && fast_cpu != NULL && "SLOW_CPU or FAST_CPU environment variable not set");
/* create a thread pool */
for (t = 0; t < ULT_TYPE_MAX; t++) {
switch (t) {
case ULT_FAST: cpu_list = fast_cpu; break;
case ULT_SLOW: cpu_list = slow_cpu; break;
}
for (i = 0; i < THREAD_POOL_SIZE; i++) {
// Read cpu_list as list of comma-separated integers.
pool[t][i].cpu = strtol(cpu_list, &end, 10);
assert(end != cpu_list && "SLOW_CPU/FAST_CPU contained invalid data");
cpu_list = end;
if (*cpu_list == ',') cpu_list++;
pthread_create(&pool[t][i].thread,
NULL,
ult_pool_thread_entry,
&pool[t][i]);
}
}
}
static void ult_uninitialize(void) {
/* this function is only called when the last ULT calls
* ult_unregister_klt, so we do not have to care about existing ready
* queue contents */
int i, t;
/* send the threads a message and wait for them to stop */
for (t = 0; t < ULT_TYPE_MAX; t++) {
for (i = 0; i < THREAD_POOL_SIZE; i++) {
__atomic_store_n(&pool[t][i].queue[0],
STOP_THREAD,
__ATOMIC_SEQ_CST);
}
}
for (t = 0; t < ULT_TYPE_MAX; t++) {
for (i = 0; i < THREAD_POOL_SIZE; i++) {
pthread_join(pool[t][i].thread, NULL);
}
}
}
void ult_register_asm(struct current_thread_info *thread,
struct thread_pool_info *first_klt);
void ult_register_klt(void) {
pthread_mutex_lock(&init_mutex);
klt_count++;
if (!initialized) {
ult_initialize();
initialized = 1;
}
pthread_mutex_unlock(&init_mutex);
/* allocate a second stack for this kernel-level thread */
struct current_thread_info *thread =
malloc(sizeof(struct current_thread_info));
memset(thread, 0, sizeof(*thread));
sem_init(&thread->exit_sem, 0, 0);
/* store the pointer to the current thread in TLS so that it is always
* directly available via %fs */
current = thread;
/* migrate this user-level thread to the thread pool and let this
* kernel-level thread block until ult_unregister_klt migrates the ULT
* back */
ult_register_asm(current, &pool[0][0]);
}
void ult_wait_for_unregister(struct current_thread_info *thread) {
sem_wait(&thread->exit_sem);
}
void ult_signal_unregister(struct current_thread_info *thread) {
sem_post(&thread->exit_sem);
}
void ult_unregister_asm(struct current_thread_info *thread);
void ult_unregister_klt(void) {
if (current == NULL) {
return;
}
/* migrate the thread to its original kernel-level thread */
ult_unregister_asm(current);
pthread_mutex_lock(&init_mutex);
klt_count--;
if (initialized && klt_count == 0) {
ult_uninitialize();
initialized = 0;
}
pthread_mutex_unlock(&init_mutex);
}
void ult_migrate_asm(struct current_thread_info *ult,
struct thread_pool_info *next);
void ult_migrate(enum ult_thread_type type) {
assert(type >= 0 && type < ULT_TYPE_MAX);
if (current == NULL) {
return;
}
/* TODO: determine suitable threads in the thread pool for the phase */
// TODO: Use more than one thread?
struct thread_pool_info *next = &pool[type][0];
if (next == current->pool_thread) {
return;
}
ult_migrate_asm(current, next);
}
int ult_registered(void) {
return current != NULL;
}