-
Notifications
You must be signed in to change notification settings - Fork 561
/
fragment.c
8711 lines (8057 loc) · 360 KB
/
fragment.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* **********************************************************
* Copyright (c) 2011-2022 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2000-2001 Hewlett-Packard Company */
/*
* fragment.c - fragment related routines
*/
#include "globals.h"
#include "link.h"
#include "fragment.h"
#include "fcache.h"
#include "emit.h"
#include "monitor.h"
#include "instrument.h"
#include <stddef.h> /* for offsetof */
#include <limits.h> /* UINT_MAX */
#include "perscache.h"
#include "synch.h"
#ifdef UNIX
# include "nudge.h"
#endif
/* FIXME: make these runtime parameters */
#define INIT_HTABLE_SIZE_SHARED_BB (DYNAMO_OPTION(coarse_units) ? 5 : 10)
#define INIT_HTABLE_SIZE_SHARED_TRACE 10
/* the only private bbs will be selfmod, so start small */
#define INIT_HTABLE_SIZE_BB (DYNAMO_OPTION(shared_bbs) ? 5 : 10)
/* coarse-grain fragments do not use futures */
#define INIT_HTABLE_SIZE_SHARED_FUTURE (DYNAMO_OPTION(coarse_units) ? 5 : 10)
#ifdef RETURN_AFTER_CALL
/* we have small per-module hashtables */
# define INIT_HTABLE_SIZE_AFTER_CALL 5
#endif
/* private futures are only used when we have private fragments */
#define INIT_HTABLE_SIZE_FUTURE \
((DYNAMO_OPTION(shared_bbs) && DYNAMO_OPTION(shared_traces)) ? 5 : 9)
/* per-module htables */
#define INIT_HTABLE_SIZE_COARSE 5
#define INIT_HTABLE_SIZE_COARSE_TH 4
#ifdef RCT_IND_BRANCH
# include "rct.h"
/* we have small per-module hashtables */
# define INIT_HTABLE_SIZE_RCT_IBT 7
# ifndef RETURN_AFTER_CALL
# error RCT_IND_BRANCH requires RETURN_AFTER_CALL since it reuses data types
# endif
#endif
/* if shared traces, we currently have no private traces so make table tiny
* FIMXE: should start out w/ no table at all
*/
#define INIT_HTABLE_SIZE_TRACE (DYNAMO_OPTION(shared_traces) ? 6 : 9)
/* for small table sizes resize is not an expensive operation and we start smaller */
/* Current flusher, protected by thread_initexit_lock. */
DECLARE_FREQPROT_VAR(static dcontext_t *flusher, NULL);
/* Current allsynch-flusher, protected by thread_initexit_lock. */
DECLARE_FREQPROT_VAR(static dcontext_t *allsynch_flusher, NULL);
/* Current flush base and size, protected by thread_initexit_lock. */
DECLARE_FREQPROT_VAR(static app_pc flush_base, NULL);
DECLARE_FREQPROT_VAR(static size_t flush_size, 0);
/* These global tables are kept on the heap for selfprot (case 7957) */
/* synchronization to these tables is accomplished via read-write locks,
* where the writers are removal and resizing -- addition is atomic to
* readers.
* for now none of these are read from ibl routines so we only have to
* synch with other DR routines
*/
static fragment_table_t *shared_bb;
static fragment_table_t *shared_trace;
/* if we have either shared bbs or shared traces we need this shared: */
static fragment_table_t *shared_future;
/* Thread-shared tables are allocated in a shared per_thread_t.
* The structure is also used if we're dumping shared traces.
* Kept on the heap for selfprot (case 7957)
*/
static per_thread_t *shared_pt;
#define USE_SHARED_PT() \
(SHARED_IBT_TABLES_ENABLED() || (TRACEDUMP_ENABLED() && DYNAMO_OPTION(shared_traces)))
/* We keep track of "old" IBT target tables in a linked list and
* deallocate them in fragment_exit(). */
/* FIXME Deallocate tables more aggressively using a distributed, refcounting
* algo as is used for shared deletion. */
typedef struct _dead_fragment_table_t {
fragment_entry_t *table_unaligned;
uint table_flags;
uint capacity;
uint ref_count;
struct _dead_fragment_table_t *next;
} dead_fragment_table_t;
/* We keep these list pointers on the heap for selfprot (case 8074). */
typedef struct _dead_table_lists_t {
dead_fragment_table_t *dead_tables;
dead_fragment_table_t *dead_tables_tail;
} dead_table_lists_t;
static dead_table_lists_t *dead_lists;
DECLARE_CXTSWPROT_VAR(static mutex_t dead_tables_lock, INIT_LOCK_FREE(dead_tables_lock));
#ifdef RETURN_AFTER_CALL
/* High level lock for an atomic lookup+add operation on the
* after call tables. */
DECLARE_CXTSWPROT_VAR(static mutex_t after_call_lock, INIT_LOCK_FREE(after_call_lock));
/* We use per-module tables and only need this table for non-module code;
* on Linux though this is the only table used, until we have a module list.
*/
static rct_module_table_t rac_non_module_table;
#endif
/* allows independent sequences of flushes and delayed deletions,
* though with -syscalls_synch_flush additions we now hold this
* throughout a flush.
*/
DECLARE_CXTSWPROT_VAR(mutex_t shared_cache_flush_lock,
INIT_LOCK_FREE(shared_cache_flush_lock));
/* Global count of flushes, used as a timestamp for shared deletion.
* Reads may be done w/o a lock, but writes can only be done
* via increment_global_flushtime() while holding shared_cache_flush_lock.
*/
DECLARE_FREQPROT_VAR(uint flushtime_global, 0);
DECLARE_CXTSWPROT_VAR(mutex_t client_flush_request_lock,
INIT_LOCK_FREE(client_flush_request_lock));
DECLARE_CXTSWPROT_VAR(client_flush_req_t *client_flush_requests, NULL);
#if defined(RCT_IND_BRANCH) && defined(UNIX)
/* On Win32 we use per-module tables; on Linux we use a single global table,
* until we have a module list.
*/
rct_module_table_t rct_global_table;
#endif
#define NULL_TAG ((app_pc)PTR_UINT_0)
/* FAKE_TAG is used as a deletion marker for unlinked entries */
#define FAKE_TAG ((app_pc)PTR_UINT_MINUS_1)
/* instead of an empty hashtable slot containing NULL, we fill it
* with a pointer to this constant fragment, which we give a tag
* of 0.
* PR 305731: rather than having a start_pc of 0, which causes
* an app targeting 0 to crash at 0, we point at a handler that
* sends the app to an ibl miss.
*/
byte *hashlookup_null_target;
#define HASHLOOKUP_NULL_START_PC ((cache_pc)hashlookup_null_handler)
static const fragment_t null_fragment = {
NULL_TAG, 0, 0, 0, 0, HASHLOOKUP_NULL_START_PC,
};
/* to avoid range check on fast path using an end of table sentinel fragment */
static const fragment_t sentinel_fragment = {
NULL_TAG, 0, 0, 0, 0, HASHLOOKUP_SENTINEL_START_PC,
};
/* Shared fragment IBTs: We need to preserve the open addressing traversal
* in the hashtable while marking a table entry as unlinked.
* A null_fragment won't work since it terminates the traversal,
* so we use an unlinked marker. The lookup table entry for
* an unlinked entry *always* has its start_pc_fragment set to
* an IBL target_delete entry.
*/
static const fragment_t unlinked_fragment = {
FAKE_TAG,
};
/* macro used in the code from time of deletion markers */
/* Shared fragment IBTs: unlinked_fragment isn't a real fragment either. So they
* are naturally deleted during a table resize. */
#define REAL_FRAGMENT(fragment) \
((fragment) != &null_fragment && (fragment) != &unlinked_fragment && \
(fragment) != &sentinel_fragment)
#define GET_PT(dc) \
((dc) == GLOBAL_DCONTEXT ? (USE_SHARED_PT() ? shared_pt : NULL) \
: (per_thread_t *)(dc)->fragment_field)
#define TABLE_PROTECTED(ptable) \
(!TABLE_NEEDS_LOCK(ptable) || READWRITE_LOCK_HELD(&(ptable)->rwlock))
/* everything except the invisible table is in here */
#define GET_FTABLE_HELPER(pt, flags, otherwise) \
(TEST(FRAG_IS_TRACE, (flags)) \
? (TEST(FRAG_SHARED, (flags)) ? shared_trace : &pt->trace) \
: (TEST(FRAG_SHARED, (flags)) \
? (TEST(FRAG_IS_FUTURE, (flags)) ? shared_future : shared_bb) \
: (TEST(FRAG_IS_FUTURE, (flags)) ? &pt->future : (otherwise))))
#define GET_FTABLE(pt, flags) GET_FTABLE_HELPER(pt, (flags), &pt->bb)
/* indirect branch table per target type (bb vs trace) and indirect branch type */
#define GET_IBT_TABLE(pt, flags, branch_type) \
(TEST(FRAG_IS_TRACE, (flags)) \
? (DYNAMO_OPTION(shared_trace_ibt_tables) \
? &shared_pt->trace_ibt[(branch_type)] \
: &(pt)->trace_ibt[(branch_type)]) \
: (DYNAMO_OPTION(shared_bb_ibt_tables) ? &shared_pt->bb_ibt[(branch_type)] \
: &(pt)->bb_ibt[(branch_type)]))
/********************************** STATICS ***********************************/
static uint
fragment_heap_size(uint flags, int direct_exits, int indirect_exits);
static void
fragment_free_future(dcontext_t *dcontext, future_fragment_t *fut);
#if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)
static void
coarse_persisted_fill_ibl(dcontext_t *dcontext, coarse_info_t *info,
ibl_branch_type_t branch_type);
#endif
static void
process_client_flush_requests(dcontext_t *dcontext, dcontext_t *alloc_dcontext,
client_flush_req_t *req, bool flush);
/* trace logging and synch for shared trace file: */
DECLARE_CXTSWPROT_VAR(static mutex_t tracedump_mutex, INIT_LOCK_FREE(tracedump_mutex));
DECLARE_FREQPROT_VAR(static stats_int_t tcount, 0); /* protected by tracedump_mutex */
static void
exit_trace_file(per_thread_t *pt);
static void
output_trace(dcontext_t *dcontext, per_thread_t *pt, fragment_t *f,
stats_int_t deleted_at);
static void
init_trace_file(per_thread_t *pt);
#define SHOULD_OUTPUT_FRAGMENT(flags) \
(TEST(FRAG_IS_TRACE, (flags)) && !TEST(FRAG_TRACE_OUTPUT, (flags)) && \
TRACEDUMP_ENABLED())
#define FRAGMENT_COARSE_WRAPPER_FLAGS \
FRAG_FAKE | FRAG_SHARED | FRAG_COARSE_GRAIN | FRAG_LINKED_OUTGOING | \
FRAG_LINKED_INCOMING
/* We use temporary fragment_t + linkstub_t structs to more easily
* use existing code when emitting coarse-grain fragments.
* Only 1-ind-exit or 1 or 2 dir exit bbs can be coarse-grain.
* The bb_building_lock protects use of this.
*/
DECLARE_FREQPROT_VAR(
static struct {
fragment_t f;
union {
struct {
direct_linkstub_t dir_exit_1;
direct_linkstub_t dir_exit_2;
} dir_exits;
indirect_linkstub_t ind_exit;
} exits;
} coarse_emit_fragment,
{ { 0 } });
#ifdef SHARING_STUDY
/***************************************************************************
* fragment_t sharing study
* Only used with -fragment_sharing_study
* When the option is off we go ahead and waste the 4 static vars
* below so we don't have to have a define and separate build.
*/
typedef struct _thread_list_t {
uint thread_num;
uint count;
struct _thread_list_t *next;
} thread_list_t;
typedef struct _shared_entry_t {
app_pc tag;
uint num_threads;
thread_list_t *threads;
uint heap_size;
uint cache_size;
struct _shared_entry_t *next;
} shared_entry_t;
# define SHARED_HASH_BITS 16
static shared_entry_t **shared_blocks;
DECLARE_CXTSWPROT_VAR(static mutex_t shared_blocks_lock,
INIT_LOCK_FREE(shared_blocks_lock));
static shared_entry_t **shared_traces;
DECLARE_CXTSWPROT_VAR(static mutex_t shared_traces_lock,
INIT_LOCK_FREE(shared_traces_lock));
/* assumes caller holds table's lock! */
static shared_entry_t *
shared_block_lookup(shared_entry_t **table, fragment_t *f)
{
shared_entry_t *e;
uint hindex;
hindex = HASH_FUNC_BITS((ptr_uint_t)f->tag, SHARED_HASH_BITS);
/* using collision chains */
for (e = table[hindex]; e != NULL; e = e->next) {
if (e->tag == f->tag) {
return e;
}
}
return NULL;
}
static void
reset_shared_block_table(shared_entry_t **table, mutex_t *lock)
{
shared_entry_t *e, *nxte;
uint i;
uint size = HASHTABLE_SIZE(SHARED_HASH_BITS);
d_r_mutex_lock(lock);
for (i = 0; i < size; i++) {
for (e = table[i]; e != NULL; e = nxte) {
thread_list_t *tl = e->threads;
thread_list_t *tlnxt;
nxte = e->next;
while (tl != NULL) {
tlnxt = tl->next;
global_heap_free(tl, sizeof(thread_list_t) HEAPACCT(ACCT_OTHER));
tl = tlnxt;
}
global_heap_free(e, sizeof(shared_entry_t) HEAPACCT(ACCT_OTHER));
}
}
global_heap_free(table, size * sizeof(shared_entry_t *) HEAPACCT(ACCT_OTHER));
d_r_mutex_unlock(lock);
}
static void
add_shared_block(shared_entry_t **table, mutex_t *lock, fragment_t *f)
{
shared_entry_t *e;
uint hindex;
int num_direct = 0, num_indirect = 0;
linkstub_t *l = FRAGMENT_EXIT_STUBS(f);
/* use num to avoid thread_id_t recycling problems */
uint tnum = get_thread_num(d_r_get_thread_id());
d_r_mutex_lock(lock);
e = shared_block_lookup(table, f);
if (e != NULL) {
thread_list_t *tl = e->threads;
for (; tl != NULL; tl = tl->next) {
if (tl->thread_num == tnum) {
tl->count++;
LOG(GLOBAL, LOG_ALL, 2,
"add_shared_block: tag " PFX ", but re-add #%d for thread #%d\n",
e->tag, tl->count, tnum);
d_r_mutex_unlock(lock);
return;
}
}
tl = global_heap_alloc(sizeof(thread_list_t) HEAPACCT(ACCT_OTHER));
tl->thread_num = tnum;
tl->count = 1;
tl->next = e->threads;
e->threads = tl;
e->num_threads++;
LOG(GLOBAL, LOG_ALL, 2,
"add_shared_block: tag " PFX " thread #%d => %d threads\n", e->tag, tnum,
e->num_threads);
d_r_mutex_unlock(lock);
return;
}
/* get num stubs to find heap size */
for (; l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
if (LINKSTUB_DIRECT(l->flags))
num_direct++;
else {
ASSERT(LINKSTUB_INDIRECT(l->flags));
num_indirect++;
}
}
/* add entry to thread hashtable */
e = (shared_entry_t *)global_heap_alloc(sizeof(shared_entry_t) HEAPACCT(ACCT_OTHER));
e->tag = f->tag;
e->num_threads = 1;
e->heap_size = fragment_heap_size(f->flags, num_direct, num_indirect);
e->cache_size = (f->size + f->fcache_extra);
e->threads = global_heap_alloc(sizeof(thread_list_t) HEAPACCT(ACCT_OTHER));
e->threads->thread_num = tnum;
e->threads->count = 1;
e->threads->next = NULL;
LOG(GLOBAL, LOG_ALL, 2,
"add_shared_block: tag " PFX ", heap %d, cache %d, thread #%d\n", e->tag,
e->heap_size, e->cache_size, e->threads->thread_num);
hindex = HASH_FUNC_BITS((ptr_uint_t)f->tag, SHARED_HASH_BITS);
e->next = table[hindex];
table[hindex] = e;
d_r_mutex_unlock(lock);
}
static void
print_shared_table_stats(shared_entry_t **table, mutex_t *lock, const char *name)
{
uint i;
shared_entry_t *e;
uint size = HASHTABLE_SIZE(SHARED_HASH_BITS);
uint tot = 0, shared_tot = 0, shared = 0, heap = 0, cache = 0, creation_count = 0;
d_r_mutex_lock(lock);
for (i = 0; i < size; i++) {
for (e = table[i]; e != NULL; e = e->next) {
thread_list_t *tl = e->threads;
tot++;
shared_tot += e->num_threads;
for (; tl != NULL; tl = tl->next)
creation_count += tl->count;
if (e->num_threads > 1) {
shared++;
/* assume similar size for each thread -- cache padding
* only real difference
*/
heap += (e->heap_size * e->num_threads);
cache += (e->cache_size * e->num_threads);
}
}
}
d_r_mutex_unlock(lock);
LOG(GLOBAL, LOG_ALL, 1, "Shared %s statistics:\n", name);
LOG(GLOBAL, LOG_ALL, 1, "\ttotal blocks: %10d\n", tot);
LOG(GLOBAL, LOG_ALL, 1, "\tcreation count: %10d\n", creation_count);
LOG(GLOBAL, LOG_ALL, 1, "\tshared count: %10d\n", shared_tot);
LOG(GLOBAL, LOG_ALL, 1, "\tshared blocks: %10d\n", shared);
LOG(GLOBAL, LOG_ALL, 1, "\tshared heap: %10d\n", heap);
LOG(GLOBAL, LOG_ALL, 1, "\tshared cache: %10d\n", cache);
}
void
print_shared_stats()
{
print_shared_table_stats(shared_blocks, &shared_blocks_lock, "basic block");
print_shared_table_stats(shared_traces, &shared_traces_lock, "trace");
}
#endif /* SHARING_STUDY ***************************************************/
#ifdef FRAGMENT_SIZES_STUDY /*****************************************/
# include <math.h>
/* don't bother to synchronize these */
static int bb_sizes[200000];
static int trace_sizes[40000];
static int num_bb = 0;
static int num_traces = 0;
void
record_fragment_size(int size, bool is_trace)
{
if (is_trace) {
trace_sizes[num_traces] = size;
num_traces++;
ASSERT(num_traces < 40000);
} else {
bb_sizes[num_bb] = size;
num_bb++;
ASSERT(num_bb < 200000);
}
}
void
print_size_results()
{
LOG(GLOBAL, LOG_ALL, 1, "Basic block sizes (bytes):\n");
print_statistics(bb_sizes, num_bb);
LOG(GLOBAL, LOG_ALL, 1, "Trace sizes (bytes):\n");
print_statistics(trace_sizes, num_traces);
}
#endif /* FRAGMENT_SIZES_STUDY */ /*****************************************/
#define FRAGTABLE_WHICH_HEAP(flags) \
(TESTALL(FRAG_TABLE_INCLUSIVE_HIERARCHY | FRAG_TABLE_IBL_TARGETED, (flags)) \
? ACCT_IBLTABLE \
: ACCT_FRAG_TABLE)
#ifdef HASHTABLE_STATISTICS
# define UNPROT_STAT(stats) unprot_stats->stats
/* FIXME: either put in nonpersistent heap as appropriate, or
* preserve across resets
*/
# define ALLOC_UNPROT_STATS(dcontext, table) \
do { \
(table)->unprot_stats = HEAP_TYPE_ALLOC( \
(dcontext), unprot_ht_statistics_t, \
FRAGTABLE_WHICH_HEAP((table)->table_flags), UNPROTECTED); \
memset((table)->unprot_stats, 0, sizeof(unprot_ht_statistics_t)); \
} while (0)
# define DEALLOC_UNPROT_STATS(dcontext, table) \
HEAP_TYPE_FREE((dcontext), (table)->unprot_stats, unprot_ht_statistics_t, \
FRAGTABLE_WHICH_HEAP((table)->table_flags), UNPROTECTED)
# define CHECK_UNPROT_STATS(table) ASSERT(table.unprot_stats != NULL)
static void
check_stay_on_trace_stats_overflow(dcontext_t *dcontext, ibl_branch_type_t branch_type)
{
per_thread_t *pt = (per_thread_t *)dcontext->fragment_field;
hashtable_statistics_t *lookup_stats =
&pt->trace_ibt[branch_type].unprot_stats->trace_ibl_stats[branch_type];
if (lookup_stats->ib_stay_on_trace_stat < lookup_stats->ib_stay_on_trace_stat_last) {
lookup_stats->ib_stay_on_trace_stat_ovfl++;
}
lookup_stats->ib_stay_on_trace_stat_last = lookup_stats->ib_stay_on_trace_stat;
/* FIXME: ib_trace_last_ibl_exit should have an overflow check as well */
}
#endif /* HASHTABLE_STATISTICS */
/* init/update the tls slots storing this table's mask and lookup base
* N.B.: for thread-shared the caller must call for each thread
*/
/* currently we don't support a mixture */
static inline void
update_lookuptable_tls(dcontext_t *dcontext, ibl_table_t *table)
{
/* use dcontext->local_state, rather than get_local_state(), to support
* being called from other threads!
*/
local_state_extended_t *state = (local_state_extended_t *)dcontext->local_state;
ASSERT(state != NULL);
ASSERT(DYNAMO_OPTION(ibl_table_in_tls));
/* We must hold at least the read lock here, else we could grab
* an inconsistent mask/lookuptable pair if another thread is in the middle
* of resizing the table (case 10405).
*/
ASSERT_TABLE_SYNCHRONIZED(table, READWRITE);
/* case 10296: for shared tables we must update the table
* before the mask, as the ibl lookup code accesses the mask first,
* and old mask + new table is ok since it will de-ref within the
* new table (we never shrink tables) and be a miss, whereas
* new mask + old table can de-ref beyond the end of the table,
* crashing or worse.
*/
state->table_space.table[table->branch_type].lookuptable = table->table;
/* Perform a Store-Release, which when combined with a Load-Acquire of the mask
* in the IBL itself, ensures the prior store to lookuptable is always
* observed before this store to hash_mask on weakly ordered arches.
*/
ATOMIC_PTRSZ_ALIGNED_WRITE(&state->table_space.table[table->branch_type].hash_mask,
table->hash_mask, false);
}
#ifdef DEBUG
static const char *ibl_bb_table_type_names[IBL_BRANCH_TYPE_END] = { "ret_bb",
"indcall_bb",
"indjmp_bb" };
static const char *ibl_trace_table_type_names[IBL_BRANCH_TYPE_END] = { "ret_trace",
"indcall_trace",
"indjmp_trace" };
#endif
#ifdef DEBUG
static inline void
dump_lookuptable_tls(dcontext_t *dcontext)
{
/* use dcontext->local_state, rather than get_local_state(), to support
* being called from other threads!
*/
if (DYNAMO_OPTION(ibl_table_in_tls)) {
local_state_extended_t *state = (local_state_extended_t *)dcontext->local_state;
ibl_branch_type_t branch_type;
ASSERT(state != NULL);
for (branch_type = IBL_BRANCH_TYPE_START; branch_type < IBL_BRANCH_TYPE_END;
branch_type++) {
LOG(THREAD, LOG_FRAGMENT, 1, "\t Table %s, table " PFX ", mask " PFX "\n",
!SHARED_BB_ONLY_IB_TARGETS() ? ibl_trace_table_type_names[branch_type]
: ibl_bb_table_type_names[branch_type],
state->table_space.table[branch_type].lookuptable,
state->table_space.table[branch_type].hash_mask);
}
}
}
#endif
/*******************************************************************************
* IBL HASHTABLE INSTANTIATION
*/
#define FRAGENTRY_FROM_FRAGMENT(f) \
{ \
(f)->tag, PC_AS_JMP_TGT(FRAG_ISA_MODE(f->flags), (f)->start_pc) \
}
/* macros w/ name and types are duplicated in fragment.h -- keep in sync */
#define NAME_KEY ibl
#define ENTRY_TYPE fragment_entry_t
/* not defining HASHTABLE_USE_LOOKUPTABLE */
/* compiler won't let me use null_fragment.tag here */
static const fragment_entry_t fe_empty = { NULL_TAG, HASHLOOKUP_NULL_START_PC };
static const fragment_entry_t fe_sentinel = { NULL_TAG, HASHLOOKUP_SENTINEL_START_PC };
#define ENTRY_TAG(fe) ((ptr_uint_t)(fe).tag_fragment)
#define ENTRY_EMPTY (fe_empty)
#define ENTRY_SENTINEL (fe_sentinel)
#define IBL_ENTRY_IS_EMPTY(fe) \
((fe).tag_fragment == fe_empty.tag_fragment && \
(fe).start_pc_fragment == fe_empty.start_pc_fragment)
#define IBL_ENTRY_IS_INVALID(fe) ((fe).tag_fragment == FAKE_TAG)
#define IBL_ENTRY_IS_SENTINEL(fe) \
((fe).tag_fragment == fe_sentinel.tag_fragment && \
(fe).start_pc_fragment == fe_sentinel.start_pc_fragment)
#define ENTRY_IS_EMPTY(fe) IBL_ENTRY_IS_EMPTY(fe)
#define ENTRY_IS_SENTINEL(fe) IBL_ENTRY_IS_SENTINEL(fe)
#define ENTRY_IS_INVALID(fe) IBL_ENTRY_IS_INVALID(fe)
#define IBL_ENTRIES_ARE_EQUAL(fe1, fe2) ((fe1).tag_fragment == (fe2).tag_fragment)
#define ENTRIES_ARE_EQUAL(table, fe1, fe2) IBL_ENTRIES_ARE_EQUAL(fe1, fe2)
/* We set start_pc_fragment first to avoid races in a shared table where
* another thread matches the tag but then jumps to a bogus address. */
#define ENTRY_SET_TO_ENTRY(e, f) \
(e).start_pc_fragment = (f).start_pc_fragment; \
/* Ensure the start_pc_fragment store completes before the tag_fragment store: */ \
MEMORY_STORE_BARRIER(); \
(e).tag_fragment = (f).tag_fragment
#define HASHTABLE_WHICH_HEAP(flags) FRAGTABLE_WHICH_HEAP(flags)
#define HTLOCK_RANK table_rwlock
#define HASHTABLE_ENTRY_STATS 1
#include "hashtablex.h"
/* all defines are undef-ed at end of hashtablex.h */
/* required routines for hashtable interface that we don't need for this instance */
static void
hashtable_ibl_free_entry(dcontext_t *dcontext, ibl_table_t *table, fragment_entry_t entry)
{
/* nothing to do, data is inlined */
}
/*******************************************************************************
* FRAGMENT HASHTABLE INSTANTIATION
*/
/* macros w/ name and types are duplicated in fragment.h -- keep in sync */
#define NAME_KEY fragment
#define ENTRY_TYPE fragment_t *
/* not defining HASHTABLE_USE_LOOKUPTABLE */
#define ENTRY_TAG(f) ((ptr_uint_t)(f)->tag)
/* instead of setting to 0, point at null_fragment */
#define ENTRY_EMPTY ((fragment_t *)&null_fragment)
#define ENTRY_SENTINEL ((fragment_t *)&sentinel_fragment)
#define ENTRY_IS_EMPTY(f) ((f) == (fragment_t *)&null_fragment)
#define ENTRY_IS_SENTINEL(f) ((f) == (fragment_t *)&sentinel_fragment)
#define ENTRY_IS_INVALID(f) ((f) == (fragment_t *)&unlinked_fragment)
#define ENTRIES_ARE_EQUAL(t, f, g) ((f) == (g))
#define HASHTABLE_WHICH_HEAP(flags) FRAGTABLE_WHICH_HEAP(flags)
#define HTLOCK_RANK table_rwlock
#include "hashtablex.h"
/* all defines are undef-ed at end of hashtablex.h */
static void
hashtable_fragment_resized_custom(dcontext_t *dcontext, fragment_table_t *table,
uint old_capacity, fragment_t **old_table,
fragment_t **old_table_unaligned, uint old_ref_count,
uint old_table_flags)
{
/* nothing */
}
static void
hashtable_fragment_init_internal_custom(dcontext_t *dcontext, fragment_table_t *table)
{
/* nothing */
}
#ifdef DEBUG
static void
hashtable_fragment_study_custom(dcontext_t *dcontext, fragment_table_t *table,
uint entries_inc /*amnt table->entries was pre-inced*/)
{
/* nothing */
}
#endif
/* callers should use either hashtable_ibl_preinit or hashtable_resize instead */
static void
hashtable_ibl_init_internal_custom(dcontext_t *dcontext, ibl_table_t *table)
{
ASSERT(null_fragment.tag == NULL_TAG);
ASSERT(null_fragment.start_pc == HASHLOOKUP_NULL_START_PC);
ASSERT(FAKE_TAG != NULL_TAG);
ASSERT(sentinel_fragment.tag == NULL_TAG);
ASSERT(sentinel_fragment.start_pc == HASHLOOKUP_SENTINEL_START_PC);
ASSERT(HASHLOOKUP_SENTINEL_START_PC != HASHLOOKUP_NULL_START_PC);
ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags));
ASSERT(TEST(FRAG_TABLE_INCLUSIVE_HIERARCHY, table->table_flags));
/* every time we resize a table we reset the flush threshold,
* since it is cleared in place after one flush
*/
table->groom_factor_percent = TEST(FRAG_TABLE_TRACE, table->table_flags)
? DYNAMO_OPTION(trace_ibt_groom)
: DYNAMO_OPTION(bb_ibt_groom);
table->max_capacity_bits = TEST(FRAG_TABLE_TRACE, table->table_flags)
? DYNAMO_OPTION(private_trace_ibl_targets_max)
: DYNAMO_OPTION(private_bb_ibl_targets_max);
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
if (table->unprot_stats == NULL) {
/* first time, not a resize */
ALLOC_UNPROT_STATS(dcontext, table);
} /* else, keep original */
}
#endif /* HASHTABLE_STATISTICS */
if (SHARED_IB_TARGETS() && !TEST(FRAG_TABLE_SHARED, table->table_flags)) {
/* currently we don't support a mixture */
ASSERT(TEST(FRAG_TABLE_TARGET_SHARED, table->table_flags));
ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags));
ASSERT(table->branch_type != IBL_NONE);
/* Only data for one set of tables is stored in TLS -- for the trace
* tables in the default config OR the BB tables in shared BBs
* only mode.
*/
if ((TEST(FRAG_TABLE_TRACE, table->table_flags) || SHARED_BB_ONLY_IB_TARGETS()) &&
DYNAMO_OPTION(ibl_table_in_tls))
update_lookuptable_tls(dcontext, table);
}
}
/* We need our own routines to init/free our added fields */
static void
hashtable_ibl_myinit(dcontext_t *dcontext, ibl_table_t *table, uint bits,
uint load_factor_percent, hash_function_t func, uint hash_offset,
ibl_branch_type_t branch_type, bool use_lookup,
uint table_flags _IF_DEBUG(const char *table_name))
{
uint flags = table_flags;
ASSERT(dcontext != GLOBAL_DCONTEXT || TEST(FRAG_TABLE_SHARED, flags));
/* flags shared by all ibl tables */
flags |= FRAG_TABLE_INCLUSIVE_HIERARCHY;
flags |= FRAG_TABLE_IBL_TARGETED;
flags |= HASHTABLE_ALIGN_TABLE;
/* use entry stats with all our ibl-targeted tables */
flags |= HASHTABLE_USE_ENTRY_STATS;
#ifdef HASHTABLE_STATISTICS
/* indicate this is first time, not a resize */
table->unprot_stats = NULL;
#endif
table->branch_type = branch_type;
hashtable_ibl_init(dcontext, table, bits, load_factor_percent, func, hash_offset,
flags _IF_DEBUG(table_name));
/* PR 305731: rather than having a start_pc of 0, which causes an
* app targeting 0 to crash at 0, we point at a handler that sends
* the app to an ibl miss via target_delete, which restores
* registers saved in the found path.
*/
if (dcontext != GLOBAL_DCONTEXT && hashlookup_null_target == NULL) {
ASSERT(!dynamo_initialized);
hashlookup_null_target =
PC_AS_JMP_TGT(DEFAULT_ISA_MODE, get_target_delete_entry_pc(dcontext, table));
#if !defined(X64) && defined(LINUX)
/* see comments in x86.asm: we patch to avoid text relocations */
byte *pc = (byte *)hashlookup_null_handler;
byte *page_start = (byte *)PAGE_START(pc);
byte *page_end = (byte *)ALIGN_FORWARD(
pc IF_ARM(+ARM_INSTR_SIZE) + JMP_LONG_LENGTH, PAGE_SIZE);
make_writable(page_start, page_end - page_start);
# ifdef X86
insert_relative_target(pc + 1, hashlookup_null_target, NOT_HOT_PATCHABLE);
# elif defined(ARM)
/* We use a pc-rel load w/ the data right after the load */
*(byte **)(pc + ARM_INSTR_SIZE) = hashlookup_null_target;
# endif
make_unwritable(page_start, page_end - page_start);
#endif
}
}
static void
hashtable_ibl_myfree(dcontext_t *dcontext, ibl_table_t *table)
{
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags));
DEALLOC_UNPROT_STATS(dcontext, table);
}
#endif /* HASHTABLE_STATISTICS */
hashtable_ibl_free(dcontext, table);
}
static void
hashtable_fragment_free_entry(dcontext_t *dcontext, fragment_table_t *table,
fragment_t *f)
{
if (TEST(FRAG_TABLE_INCLUSIVE_HIERARCHY, table->table_flags)) {
ASSERT_NOT_REACHED(); /* case 7691 */
} else {
if (TEST(FRAG_IS_FUTURE, f->flags))
fragment_free_future(dcontext, (future_fragment_t *)f);
else
fragment_free(dcontext, f);
}
}
static inline bool
fragment_add_to_hashtable(dcontext_t *dcontext, fragment_t *e, fragment_table_t *table)
{
/* When using shared IBT tables w/trace building and BB2BB IBL, there is a
* race between adding a BB target to a table and having it marked by
* another thread as a trace head. The race exists because the two functions
* do not use a common lock.
* The race does NOT cause a correctness problem since a) the marking thread
* removes the trace head from the table and b) any subsequent add attempt
* is caught in add_ibl_target(). The table lock is used during add and
* remove operations and FRAG_IS_TRACE_HEAD is checked while holding
* the lock. So although a trace head may be present in a table temporarily --
* it's being marked while an add operation that has passed the frag flags
* check is in progress -- it will be subsequently removed by the marking
* thread.
* However, the existence of the race does mean that
* we cannot ASSERT(!(FRAG_IS_TRACE_HEAD,...)) at arbitrary spots along the
* add_ibl_target() path since such an assert could fire due to the race.
* What is likely a safe point to assert is when there is only a single
* thread in the process.
*/
DOCHECK(1, {
if (TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags) &&
d_r_get_num_threads() == 1)
ASSERT(!TEST(FRAG_IS_TRACE_HEAD, e->flags));
});
return hashtable_fragment_add(dcontext, e, table);
}
/* updates all fragments in a given fragment table which may
* have IBL routine heads inlined in the indirect exit stubs
*
* FIXME: [perf] should add a filter of which branch types need updating if
* updating all is a noticeable performance hit.
*
* FIXME: [perf] Also it maybe better to traverse all fragments in an fcache
* unit instead of entries in a half-empty hashtable
*/
static void
update_indirect_exit_stubs_from_table(dcontext_t *dcontext, fragment_table_t *ftable)
{
fragment_t *f;
linkstub_t *l;
uint i;
for (i = 0; i < ftable->capacity; i++) {
f = ftable->table[i];
if (!REAL_FRAGMENT(f))
continue;
for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
if (LINKSTUB_INDIRECT(l->flags)) {
/* FIXME: should add a filter of which branch types need updating */
update_indirect_exit_stub(dcontext, f, l);
LOG(THREAD, LOG_FRAGMENT, 5,
"\tIBL target table resizing: updating F%d\n", f->id);
STATS_INC(num_ibl_stub_resize_updates);
}
}
}
}
static void
safely_nullify_tables(dcontext_t *dcontext, ibl_table_t *new_table,
fragment_entry_t *table, uint capacity)
{
uint i;
cache_pc target_delete =
PC_AS_JMP_TGT(DEFAULT_ISA_MODE, get_target_delete_entry_pc(dcontext, new_table));
ASSERT(target_delete != NULL);
ASSERT_TABLE_SYNCHRONIZED(new_table, WRITE);
for (i = 0; i < capacity; i++) {
if (IBL_ENTRY_IS_SENTINEL(table[i])) {
ASSERT(i == capacity - 1);
continue;
}
/* We need these writes to be atomic, so check that they're aligned. */
ASSERT(ALIGNED(&table[i].tag_fragment, sizeof(table[i].tag_fragment)));
ASSERT(ALIGNED(&table[i].start_pc_fragment, sizeof(table[i].start_pc_fragment)));
/* We cannot set the tag to fe_empty.tag_fragment to break the hash chain
* as the target_delete path relies on acquiring the tag from the table entry,
* so we leave it alone.
*/
/* We set the payload to target_delete to induce a cache exit.
*
* The target_delete path leads to a loss of information -- we can't
* tell what the src fragment was (the one that transitioned to the
* IBL code) and this in principle could weaken our RCT checks (see case
* 5085). In practical terms, RCT checks are unaffected since they
* are not employed on in-cache transitions such as an IBL hit.
* (All transitions to target_delete are a race along the hit path.)
* If we still want to preserve the src info, we can leave the payload
* as-is, possibly pointing to a cache address. The effect is that
* any thread accessing the old table on the IBL hit path will not exit
* the cache as early. (We should leave the fragment_t* value in the
* table untouched also so that the fragment_table_t is in a consistent
* state.)
*/
/* For weakly ordered arches: we leave this as a weak (atomic-untorn b/c it's
* aligned) store, which should eventually be seen by the target thread.
*/
table[i].start_pc_fragment = target_delete;
}
STATS_INC(num_shared_ibt_table_flushes);
}
/* Add an item to the dead tables list */
static inline void
add_to_dead_table_list(dcontext_t *alloc_dc, ibl_table_t *ftable, uint old_capacity,
fragment_entry_t *old_table_unaligned, uint old_ref_count,
uint old_table_flags)
{
dead_fragment_table_t *item = (dead_fragment_table_t *)heap_alloc(
GLOBAL_DCONTEXT, sizeof(dead_fragment_table_t) HEAPACCT(ACCT_IBLTABLE));
LOG(GLOBAL, LOG_FRAGMENT, 2, "add_to_dead_table_list %s " PFX " capacity %d\n",
ftable->name, old_table_unaligned, old_capacity);
ASSERT(old_ref_count >= 1); /* someone other the caller must be holding a reference */
/* write lock must be held so that ref_count is copied accurately */
ASSERT_TABLE_SYNCHRONIZED(ftable, WRITE);
item->capacity = old_capacity;
item->table_unaligned = old_table_unaligned;
item->table_flags = old_table_flags;
item->ref_count = old_ref_count;
item->next = NULL;
/* Add to the end of list. We use a FIFO because generally we'll be
* decrementing ref-counts for older tables before we do so for
* younger tables. A FIFO will yield faster searches than, say, a
* stack.
*/
d_r_mutex_lock(&dead_tables_lock);
if (dead_lists->dead_tables == NULL) {
ASSERT(dead_lists->dead_tables_tail == NULL);
dead_lists->dead_tables = item;
} else {
ASSERT(dead_lists->dead_tables_tail != NULL);
ASSERT(dead_lists->dead_tables_tail->next == NULL);
dead_lists->dead_tables_tail->next = item;
}
dead_lists->dead_tables_tail = item;
d_r_mutex_unlock(&dead_tables_lock);
STATS_ADD_PEAK(num_dead_shared_ibt_tables, 1);
STATS_INC(num_total_dead_shared_ibt_tables);
}
/* forward decl */
static inline void
update_private_ptr_to_shared_ibt_table(dcontext_t *dcontext,
ibl_branch_type_t branch_type, bool trace,
bool adjust_old_ref_count, bool lock_table);
static void
hashtable_ibl_resized_custom(dcontext_t *dcontext, ibl_table_t *table, uint old_capacity,
fragment_entry_t *old_table,
fragment_entry_t *old_table_unaligned, uint old_ref_count,
uint old_table_flags)
{
dcontext_t *alloc_dc = FRAGMENT_TABLE_ALLOC_DC(dcontext, table->table_flags);
per_thread_t *pt = GET_PT(dcontext);
bool shared_ibt_table =
TESTALL(FRAG_TABLE_TARGET_SHARED | FRAG_TABLE_SHARED, table->table_flags);
ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags));