-
Notifications
You must be signed in to change notification settings - Fork 1
/
arm_selector.ml
1277 lines (1166 loc) · 48.3 KB
/
arm_selector.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(***************************************************************************)
(* *)
(* Copyright (C) 2022/2023 The Charles Stark Draper Laboratory, Inc. *)
(* *)
(* This file is provided under the license found in the LICENSE file in *)
(* the top-level directory of this project. *)
(* *)
(* This research was developed with funding from the Defense Advanced *)
(* Research Projects Agency (DARPA). *)
(* *)
(***************************************************************************)
open !Core_kernel
open Bap.Std
open Bap_core_theory
open KB.Let
module Err = Kb_error
(*-------------------------------------------------------
*
* Instruction selection works in the "standard" maximal munch
* manner: we match against a portion of the AST, call the selector
* recursively and generate opcodes.
*
*
*
* The approach attempts to create a small DSL for opcodes and their
* operands, ultimately "smart" constructors for [Ir.t] blocks, and
* then use ordinary OCaml pattern matching, with DSL terms on the
* right hand side.
*
*
* since BIR constructs may be at a higher-level than ARM ones, and be
* nested, we need to flatten the terms, as well as disentangle the
* different kinds of operations. We roughly follow the Core Theory
* division of data and control _effects_, and _pure_ operations which
* simply create data, we do _not_ distinguish a memory and non-memory
* operations, since we need to track dependencies between sequential
* memory reads and writes (though the variable name for memory doesn't
* really matter in the end).
*
* This results in 2 types:
*
* - [arm_eff] which contains the data required for the control and data
* operations in the current block, as well as the other blocks
* involved in the computation.
*
* - [arm_pure], which contains a variable or constant which represents
* the value being computed, as well as the effects required to compute
* that value (in the form of an [arm_eff] field.
*
* Every value is assumed to fit in a "standard" 32 bit register, so
* anything involving memory, or "double" width words have to be handled
* explicitly. It's currently an issue that we don't track bit-widths
* explicitly, since it may result in errors down the line, if we assume
* widths of registers or operands incorrectly. It sure simplifies the
* code though.
*
*
* There's a bunch of "preassign" operations throughout the module, since
* this allows the front-end to use special variable names, which we then
* know will be correctly allocated to specific registers, e.g. PC or
* SP. A little care must be taken depending on whether we're generating
* ARM proper or Thumb. There are probably some land mines here, since
* some instructions do not allow some registers, and this is not necessarily
* enforced by our constraints.
*
* We also provide the set of "allowed" general purpose registers, to be
* used by the minizinc back end when doing selection.
*
* The general approach to emitting instructions for BIL expressions is
* the following:
* 1. Recursively emit instructions for the sub-terms.
* 2. Generate a fresh temp to store the result.
* 3. Select the appropriate opcode, and move the result into the
* generated temp.
* 4. Union the set of all effects that participated in building the
* result.
* 5. Return the resulting [arm_pure] record.
*
* The opcodes are roughly just the ARM assembly mnemonic strings,
* they're created in the specific [Ops] module. Actually building
* the opcodes with their arguments (the aforementioned DSL) is done in
* the [Arm_ops] module.
*
* The selection is done by a series of pattern matching code, namely the
* [select_FOO] functions, where [FOO] is one of [exp], [stmt] or [blk].
*
* The pretty printer takes the resulting Vibes IR block, and is
* relatively straightforward except for various idiosyncrasies of the
* ARM assembly syntax:
*
* Roughly the only real issue is adding square brackets at the
* appropriate places. For example, the [str] operation can support
* (among many others!) the syntaxes [str r0, [r1, #42]], [str r0, [r1]]
* etc.
*
* We do that by marking operands by one of four tags (of a type called
* [bracket]): [Open], [Close], [Neither] or [Both]. These tags mark
* whether to open a square bracket, close one, enclose the operand or
* neither. Roughly we just create a parallel list of the same length as operands.
*
* We do this in kind of a hacky way, but I'm not sure how to do this
* better.
*
* Pretty printing is also the time when we resolve offsets: we do the
* arithmetic required to make them correct relative the the original
* binary, even though they may be in the patch location. See
* [arm_operand_pretty] for details.
*
*
* Finally, we create a peephole optimization which, *after register
* allocation*, deletes all instructions of the form [mov ri ri] (or of
* the form [add ri #0]). This is the only point when this can be done,
* since before register allocation we do not know if the [mov] will be
* of that form.
*
* Sadly there is no way to have an explicit [nop] generated at the moment.
*
*
*
*
*---------------------------------------------------------*)
type arm_eff = {
(* These are the move/load/store operations in the current block *)
current_data : Ir.operation list;
(* These are the jump/goto operations in the current block *)
current_ctrl : Ir.operation list;
(* This is the list of blocks which encode the semantics of the
operation, except those contained in [current_data] and
[current_ctrl] *)
other_blks : Ir.t}
[@@deriving compare, equal, sexp]
let empty_eff = {current_data = []; current_ctrl = []; other_blks = Ir.empty}
type arm_pure = {op_val : Ir.operand; op_eff : arm_eff}
[@@deriving compare, equal, sexp]
(* The default type for memory words *)
let word_ty = Bil.Types.Imm 32
let bit_ty = Bil.Types.Imm 1
let mem_ty = Bil.Types.Mem (`r32, `r8)
let is_arm (lang : Theory.language) : bool KB.t =
if Theory.Language.is_unknown lang
then Err.(fail Unknown_encoding)
else
KB.return @@
String.is_substring ~substring:"arm" @@
Theory.Language.to_string lang
let is_thumb (lang : Theory.language) : bool KB.t =
if Theory.Language.is_unknown lang
then Err.(fail Unknown_encoding)
else
KB.return @@
String.is_substring ~substring:"thumb" @@
Theory.Language.to_string lang
let is_arm_or_thumb (lang : Theory.language) : bool KB.t =
let+ arm = is_arm lang and+ thumb = is_thumb lang in
arm || thumb
(* FIXME: this feels very redundant: we should just leave the
responsibility for this in ir.ml or minizinc.ml *)
let regs (tgt : Theory.target) (_ : Theory.language) =
let bap_regs =
Theory.Target.regs tgt |> Set.map ~f:Var.reify (module Var)
in
let pc = Var.create ~is_virtual:false ~fresh:false "PC" word_ty in
Var.Set.add bap_regs pc
let gpr (tgt : Theory.target) (lang : Theory.language) : Var.Set.t KB.t =
let+ thumb = is_thumb lang in
let roles = [Theory.Role.Register.general] in
let roles =
if thumb
then Theory.Role.read ~package:"arm" "thumb"::roles
else roles
in
let exclude = Theory.Role.Register.[
stack_pointer;
frame_pointer;
] in
let maybe_reify v =
let v = Var.reify v in
let name = Var.name v in
if String.(is_prefix name ~prefix:"R") &&
not (thumb && String.(equal name "R7"))
then Some v
else None
in
Theory.Target.regs ~exclude:exclude ~roles:roles tgt |>
Set.filter_map ~f:(maybe_reify) (module Var)
(* Assuming the var is in linear SSA form, we undo this form,
and then check if it's a register name. *)
let reg_name (v : var) : string option =
let name = Var.name v in
let name = try Linear_ssa.orig_name name with _ -> name in
Substituter.unmark_reg_name name
let is_stack_pointer (v : var) : bool = match reg_name v with
| Some "SP" -> true
| _ -> false
let preassign_var (pre : var option) (v : var)
~(is_thumb : bool) : var option =
let typ = Var.typ v in
match reg_name v with
| None -> pre
| Some "FP" ->
(* We assign R11 as the pre-assigned FP register on ARM, and R7
for Thumb, keeping in line with the ABI (as far as i can
tell). *)
if is_thumb
then Some (Var.create "R7" typ)
else Some (Var.create "R11" typ)
| Some name -> Some (Var.create name typ)
let preassign
(_tgt : Theory.target)
(ir : Ir.t)
~(is_thumb : bool) : Ir.t =
let pre = preassign_var ~is_thumb in
Ir.map_op_vars ir ~f:(fun v -> {
v with pre_assign = List.hd_exn v.temps |> pre v.pre_assign})
(* Appends the effects of s2 to those of s1, respecting the order if they
are not in seperate blocks. *)
let (@.) (s1 : arm_eff) (s2 : arm_eff) : arm_eff =
let { current_data = data1; current_ctrl = ctrl1; other_blks = blks1} = s1 in
let { current_data = data2; current_ctrl = ctrl2; other_blks = blks2} = s2 in
{
current_data = data1 @ data2;
current_ctrl = ctrl1 @ ctrl2;
other_blks = Ir.union blks1 blks2
}
module ARM_ops = struct
(* Condition codes used in ARM. *)
module Cond = struct
type t = EQ | NE | LE | GT | LT | GE | HI | LO | HS | LS
let to_string : t -> string = function
| EQ -> "eq"
| NE -> "ne"
| LE -> "le"
| GT -> "gt"
| LT -> "lt"
| GE -> "ge"
| HI -> "hi"
| LO -> "lo"
| HS -> "hs"
| LS -> "ls"
let opposite : t -> t = function
| EQ -> NE
| NE -> EQ
| LE -> GT
| GT -> LE
| LT -> GE
| GE -> LT
| HI -> LS
| LO -> HS
| HS -> LO
| LS -> HI
end
module Ops = struct
include Ir.Opcode
let op ?(cnd : Cond.t option = None) s =
let s = s ^ Option.value_map cnd ~default:"" ~f:Cond.to_string in
create ~arch:"arm" s
(* With Thumb, there are cases where we need to set the flags to get the
narrow encoding of the instruction (and other cases where this is the
opposite). *)
let movcc cnd = op "mov" ~cnd
let mov set_flags = op (if set_flags then "movs" else "mov")
let movw = op "movw"
let add set_flags = op (if set_flags then "adds" else "add")
let addw = op "addw"
let mul = op "mul"
let sub set_flags = op (if set_flags then "subs" else "sub")
let neg = op "neg"
let mvn = op "mvn"
let lsl_ = op "lsl"
let lsr_ = op "lsr"
let asr_ = op "asr"
let and_ = op "and"
let orr = op "orr"
let eor = op "eor"
let ldr = op "ldr"
let ldrh = op "ldrh"
let ldrb = op "ldrb"
let str = op "str"
let cmp = op "cmp"
let sdiv = op "sdiv"
let udiv = op "udiv"
let b ?(cnd = None) () = op "b" ~cnd
let bl ?(cnd = None) () = op "bl" ~cnd
(* Workaround for issue with minizinc not correctly handling spaces. *)
let ite cnd = op ("ite_" ^ Cond.to_string cnd)
let it cnd = op ("it_" ^ Cond.to_string cnd)
end
let create_temp ty =
Var.create ~is_virtual:true ~fresh:true "tmp" ty |>
Ir.simple_var
(* Helper data structure for generating conditional branches. *)
module Branch = struct
(* `generate` accepts the condition and returns the corresponding
branch instruction, along with the fake destination operand.
`is_call` denotes whether this is a conditional call or not.
We bookkeep this information because on Thumb a conditional
call needs to be placed inside of an IT block.
*)
type t = {
generate : Cond.t -> Ir.operation * Ir.operand;
is_call : bool;
}
let create (dst : Ir.operand) ~(is_call : bool) : t = {
generate = (fun cnd ->
let cnd = Some cnd in
let opcode =
if is_call then Ops.(bl () ~cnd) else Ops.(b () ~cnd) in
let tmp = Ir.Void (create_temp bit_ty) in
let op = Ir.simple_op opcode tmp [dst] in
op, tmp);
is_call;
}
end
(* defaults to data instructions, since they are way more common *)
let instr i sem =
{sem with current_data = i::sem.current_data}
(* Create a control statement. *)
let control j sem =
{sem with current_ctrl = j::sem.current_ctrl}
(* Check if the constant is greater than 255, in which case we want
movw rather than mov. If it's greater than 65535, then we will use
the ldr pseudo instruction. The assembler will store the constant
in a literal pool at the end of our patch, and it will access this
constant with a PC-relative load. *)
let mov_const (c : word) ~(is_thumb : bool) : Ir.opcode =
match Word.to_int_exn c with
| n when n <= 0xFF -> Ops.mov is_thumb
| n when n <= 0xFFFF -> Ops.movw
| _ -> Ops.ldr
(* Some cowboy type checking here, to check which kind of mov to
use. Currently doesn't work if variables are instantiated
with spilled registers! Can be fixed by having seperate Variable
constructors for spilled and non-spilled registers. *)
let arm_mov (arg1 : Ir.operand) (arg2 : arm_pure)
~(is_thumb : bool) : arm_eff KB.t =
let {op_val = arg2_var; op_eff = arg2_sem} = arg2 in
match arg1, arg2_var with
| Ir.Var _, Ir.Var _
when not @@ List.is_empty arg2_sem.current_data -> begin
(* FIXME: absolute hack! if we have vars here, we can assume
that the last operation assigned to a temporary, and we can
just replace that temporary with the known destination, and
return that as the effect. *)
match arg2_sem.current_data with
| [] -> assert false (* excluded by the guard above *)
| op :: ops ->
let op = {op with Ir.lhs = [arg1]} in
KB.return {arg2_sem with current_data = op :: ops}
end
| Ir.Var _, Ir.Var _ ->
let mov = Ir.simple_op Ops.(mov is_thumb) arg1 [arg2_var] in
KB.return @@ instr mov arg2_sem
| Ir.Var _, Ir.Const w ->
let mov = Ir.simple_op (mov_const w ~is_thumb) arg1 [arg2_var] in
KB.return @@ instr mov arg2_sem
| Ir.Void _, Ir.Void _
when not @@ List.is_empty arg2_sem.current_data -> begin
(* Same hack as above, but with void operands. *)
match arg2_sem.current_data with
| [] -> assert false
| op :: ops ->
let op = {op with Ir.lhs = [arg1]} in
KB.return {arg2_sem with current_data = op :: ops}
end
| _ ->
Err.(fail @@ Other (
sprintf "arm_mov: unexpected arguments (%s, %s)"
(Ir.pretty_operand arg1)
(Ir.pretty_operand arg2_var)))
let var v = {op_val = Ir.Var (Ir.simple_var v); op_eff = empty_eff}
let mem v = {op_val = Ir.Void (Ir.simple_var v); op_eff = empty_eff}
let const c = {op_val = Ir.Const c; op_eff = empty_eff}
let uop o ty arg ~is_thumb =
let res = create_temp ty in
let {op_val = arg_val; op_eff = arg_sem} = arg in
match arg_val with
| Ir.Const w ->
let tmp = Ir.Var (create_temp word_ty) in
let ldr = Ir.simple_op (mov_const w ~is_thumb) tmp [arg_val] in
let op = Ir.simple_op o (Ir.Var res) [tmp] in
let sem = {arg_sem with current_data = op::ldr::arg_sem.current_data} in
{op_val = Ir.Var res; op_eff = sem}
| _ ->
let op = Ir.simple_op o (Ir.Var res) [arg_val] in
let sem = {arg_sem with current_data = op::arg_sem.current_data} in
{op_val = Ir.Var res; op_eff = sem}
let binop o ty arg1 arg2 ~is_thumb =
let res = create_temp ty in
let {op_val = arg1_val; op_eff = arg1_sem} = arg1 in
let {op_val = arg2_val; op_eff = arg2_sem} = arg2 in
match arg2_val with
| Ir.Const w when Word.to_int_exn w > 0xFFF ->
(* For binops that allow constant operands, the limit seems
to be 12 bits according to the manual. *)
let tmp = Ir.Var (create_temp word_ty) in
let ldr = Ir.simple_op (mov_const w ~is_thumb) tmp [arg2_val] in
let op = Ir.simple_op o (Ir.Var res) [arg1_val; tmp] in
let sem = arg1_sem @. arg2_sem in
let sem = {sem with current_data = op::ldr::sem.current_data} in
{op_val = Ir.Var res; op_eff = sem}
| _ ->
let op = Ir.simple_op o (Ir.Var res) [arg1_val; arg2_val] in
let sem = arg1_sem @. arg2_sem in
let sem = {sem with current_data = op::sem.current_data} in
{op_val = Ir.Var res; op_eff = sem}
let ternop o ty arg1 arg2 arg3 =
let res = create_temp ty in
let {op_val = arg1_val; op_eff = arg1_sem} = arg1 in
let {op_val = arg2_val; op_eff = arg2_sem} = arg2 in
let {op_val = arg3_val; op_eff = arg3_sem} = arg3 in
let op = Ir.simple_op o (Ir.Var res) [arg1_val; arg2_val; arg3_val] in
let sem = arg1_sem @. arg2_sem @. arg3_sem in
let sem = {sem with current_data = op::sem.current_data} in
{op_val = Ir.Var res; op_eff = sem}
let add (arg1 : arm_pure) (arg2 : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
let {op_val; _} = arg2 in
let fits_in_3 w =
let open Word in
let width = bitwidth w in
of_int ~width 0 <= w && w <= of_int ~width 7
in
match op_val with
| Const w when not (fits_in_3 w) && Word.to_int_exn w <= 0xFFF ->
(* addw can accept up to 12 bits for the immediate. *)
KB.return @@ binop Ops.addw word_ty arg1 arg2 ~is_thumb
| _ ->
KB.return @@ binop Ops.(add is_thumb) word_ty arg1 arg2 ~is_thumb
let neg (arg : arm_pure) ~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ uop Ops.neg word_ty arg ~is_thumb
let lognot (arg : arm_pure) ~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ uop Ops.mvn word_ty arg ~is_thumb
let mul (arg1 : arm_pure) (arg2 : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.mul word_ty arg1 arg2 ~is_thumb
let sub (arg1 : arm_pure) (arg2 : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.(sub is_thumb) word_ty arg1 arg2 ~is_thumb
let sdiv (arg1 : arm_pure) (arg2 : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.sdiv word_ty arg1 arg2 ~is_thumb
let udiv (arg1 : arm_pure) (arg2 : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.udiv word_ty arg1 arg2 ~is_thumb
let lsl_ (arg1 : arm_pure) (arg2 : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.lsl_ word_ty arg1 arg2 ~is_thumb
let lsr_ (arg1 : arm_pure) (arg2 : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.lsr_ word_ty arg1 arg2 ~is_thumb
let asr_ (arg1 : arm_pure) (arg2 : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.asr_ word_ty arg1 arg2 ~is_thumb
let ldr_op (bits : int) : Ir.opcode KB.t =
if bits = 32 then KB.return Ops.ldr
else if bits = 16 then KB.return Ops.ldrh
else if bits = 8 then KB.return Ops.ldrb
else Err.(fail @@ Other (
sprintf "Arm_selector.ldr: Loading a bit-width that is not \
8, 16 or 32 (got %d)!" bits))
let ldr (bits : int) (mem : arm_pure) (loc : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
let+ ldr = ldr_op bits in
(* Update the semantics of loc with those of mem *)
binop ldr word_ty mem loc ~is_thumb
let str (mem : arm_pure) (value : arm_pure) (loc : arm_pure)
~(is_thumb : bool) : arm_pure KB.t =
let res = create_temp mem_ty in
let lhs = Ir.Void (create_temp mem_ty) in
let {op_val = loc_val; op_eff = loc_sem} = loc in
let {op_val = value_val; op_eff = value_sem} = value in
let {op_val = mem_val; op_eff = mem_sem} = mem in
let+ ops = match value_val with
| Var _ ->
let op = Ir.simple_op Ops.str lhs [mem_val; value_val; loc_val] in
KB.return [op]
| Const w ->
let tmp = Ir.Var (create_temp word_ty) in
let mov = Ir.simple_op (mov_const w ~is_thumb) tmp [value_val] in
let op = Ir.simple_op Ops.str lhs [mem_val; tmp; loc_val] in
KB.return [op; mov]
| _ ->
Err.(fail @@ Other (
sprintf "str: unsupported `value` operand %s" @@
Ir.pretty_operand value_val)) in
let sem = loc_sem @. value_sem @. mem_sem in
let sem = {sem with current_data = ops @ sem.current_data} in
{op_val = Void res; op_eff = sem}
(* Special case of the `str` instruction where the address that we are
storing to is of the shape `base + off` (e.g. `str R0, [R1, #8]`). *)
let str_base_off (mem : arm_pure) (value : arm_pure) (base : var) (off : word)
~(is_thumb : bool) : arm_pure KB.t =
let res = create_temp mem_ty in
let lhs = Ir.Void (create_temp mem_ty) in
let {op_val = value_val; op_eff = value_sem} = value in
let {op_val = mem_val; op_eff = mem_sem} = mem in
let base = Ir.Var (Ir.simple_var base) in
let off = Ir.Const off in
let+ ops = match value_val with
| Var _ ->
let op = Ir.simple_op Ops.str lhs [mem_val; value_val; base; off] in
KB.return [op]
| Const w ->
let tmp = Ir.Var (create_temp word_ty) in
let mov = Ir.simple_op (mov_const w ~is_thumb) tmp [value_val] in
let op = Ir.simple_op Ops.str lhs [mem_val; tmp; base; off] in
KB.return [op; mov]
| _ ->
Err.(fail @@ Other (
sprintf "str_base_off: unsupported `value` operand %s" @@
Ir.pretty_operand value_val)) in
let sem = value_sem @. mem_sem in
let sem = {sem with current_data = ops @ sem.current_data} in
{op_val = Void res; op_eff = sem}
let logand (a : arm_pure) (b : arm_pure) ~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.and_ word_ty a b ~is_thumb
let logor (a : arm_pure) (b : arm_pure) ~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.orr word_ty a b ~is_thumb
let xor (a : arm_pure) (b : arm_pure) ~(is_thumb : bool) : arm_pure KB.t =
KB.return @@ binop Ops.eor word_ty a b ~is_thumb
(* Specialization of binops for generating comparisons. *)
let binop_cmp
(cond : Cond.t)
(arg1 : arm_pure)
(arg2 : arm_pure)
~(patch : Data.Patch.t option)
~(is_thumb : bool)
~(branch : Branch.t option) : arm_pure KB.t =
let tmp_flag = Ir.Void (create_temp bit_ty) in
let {op_val = arg1_val; op_eff = arg1_sem} = arg1 in
let {op_val = arg2_val; op_eff = arg2_sem} = arg2 in
let cmp = Ir.simple_op Ops.cmp tmp_flag [arg1_val; arg2_val] in
let sem = arg1_sem @. arg2_sem in
match branch with
| Some {generate; is_call} ->
(* The comparison is used by a branch instruction, so use the
`generate` function to add it to the ctrl semantics. It should
be the only one in the current block. *)
let* () = match sem.current_ctrl with
| [] -> KB.return ()
| _ -> Err.fail @@ Other
"Arm_selector.binop_cmp: encountered a branch with non-empty \
ctrl semantics" in
let current_data =
if is_thumb && is_call then
(* Thumb requires an `it` block for conditional calls. *)
let tmp_it = Ir.Void (create_temp bit_ty) in
let it = Ir.simple_op Ops.(it cond) tmp_it [tmp_flag] in
it :: cmp :: sem.current_data
else cmp :: sem.current_data in
let br, op_val = generate cond in
let sem = {sem with current_data; current_ctrl = [br]} in
(* Keep in mind, `op_val` should be discarded. *)
KB.return @@ {op_val; op_eff = sem}
| None ->
(* Store the result of the comparison in an intermediate destination. *)
let tmp_cmp, ite =
if is_thumb then
let tmp_ite = Ir.Void (create_temp bit_ty) in
tmp_ite, [Ir.simple_op Ops.(ite cond) tmp_ite [tmp_flag]]
else tmp_flag, [] in
let tmp1 = create_temp word_ty in
let tmp2 = create_temp word_ty in
(* These temps need to be unique, but VIBES IR also needs to know
that they are congruent (i.e. they must map to the same register). *)
let* () = match patch with
| None -> KB.return ()
| Some patch ->
Data.Patch.add_congruence patch
(List.hd_exn tmp1.temps, List.hd_exn tmp2.temps) in
let then_ = Ops.movcc @@ Some cond in
let else_ = Ops.movcc @@ Some (Cond.opposite cond) in
let then_ = Ir.simple_op then_ (Var tmp1) [Const Word.(one 32); tmp_cmp] in
let else_ = Ir.simple_op else_ (Var tmp2) [Const Word.(zero 32); tmp_cmp] in
let ops = (else_ :: then_ :: ite) @ [cmp] in
let sem = {sem with current_data = ops @ sem.current_data} in
KB.return @@ {op_val = Var tmp1; op_eff = sem}
let equals
~(patch : Data.Patch.t option)
~(is_thumb : bool)
~(branch : Branch.t option) : arm_pure -> arm_pure -> arm_pure KB.t =
binop_cmp EQ ~patch ~is_thumb ~branch
let not_equals
~(patch : Data.Patch.t option)
~(is_thumb : bool)
~(branch : Branch.t option) : arm_pure -> arm_pure -> arm_pure KB.t =
binop_cmp NE ~patch ~is_thumb ~branch
let less_than
(arg1 : arm_pure)
(arg2 : arm_pure)
~(patch : Data.Patch.t option)
~(is_thumb : bool)
~(branch : Branch.t option) : arm_pure KB.t =
match arg1.op_val with
| Ir.Const _ -> binop_cmp HI arg2 arg1 ~patch ~is_thumb ~branch
| _ -> binop_cmp LO arg1 arg2 ~patch ~is_thumb ~branch
let less_or_equal
(arg1 : arm_pure)
(arg2 : arm_pure)
~(patch : Data.Patch.t option)
~(is_thumb : bool)
~(branch : Branch.t option) : arm_pure KB.t =
match arg1.op_val with
| Ir.Const _ -> binop_cmp HS arg2 arg1 ~patch ~is_thumb ~branch
| _ -> binop_cmp LS arg1 arg2 ~patch ~is_thumb ~branch
let signed_less_than
(arg1 : arm_pure)
(arg2 : arm_pure)
~(patch : Data.Patch.t option)
~(is_thumb : bool)
~(branch : Branch.t option) : arm_pure KB.t =
match arg1.op_val with
| Ir.Const _ -> binop_cmp GT arg2 arg1 ~patch ~is_thumb ~branch
| _ -> binop_cmp LT arg1 arg2 ~patch ~is_thumb ~branch
let signed_less_or_equal
(arg1 : arm_pure)
(arg2 : arm_pure)
~(patch : Data.Patch.t option)
~(is_thumb : bool)
~(branch : Branch.t option) : arm_pure KB.t =
match arg1.op_val with
| Ir.Const _ -> binop_cmp GE arg2 arg1 ~patch ~is_thumb ~branch
| _ -> binop_cmp LE arg1 arg2 ~patch ~is_thumb ~branch
(* Unconditional jump *)
let goto ?(is_call : bool = false) (tgt : Ir.operand)
(call_params : Ir.operand list) : arm_eff =
let opcode = if is_call then Ops.bl () else Ops.b () in
(* XXX: should we figure out how to better describe the effects of the call?
Ideally, we should say that the call will clobber the memory and all
caller-save registers. We could probably do this at the BIR level, at
the return successor of each call site, where we make each effect
explicit. *)
let tmp_branch = create_temp bit_ty in
let params = [tgt] @ call_params in
control (Ir.simple_op opcode (Void tmp_branch) params) empty_eff
end
(* We assume that a block is always created *)
let ir_of_arm_eff (t : arm_eff) : Ir.t KB.t =
if not (List.is_empty t.current_data && List.is_empty t.current_ctrl)
then Err.(fail @@ Other "Arm_selector.ir: expected empty data and ctrl")
else KB.return @@ Ir.add_in_vars t.other_blks
module ARM_Gen =
struct
open ARM_ops
let sel_binop
(o : binop)
~(patch : Data.Patch.t option)
~(is_thumb : bool)
~(branch : Branch.t option) : (arm_pure -> arm_pure -> arm_pure KB.t) KB.t =
match o with
| PLUS -> KB.return @@ add ~is_thumb
| MINUS -> KB.return @@ sub ~is_thumb
| TIMES -> KB.return @@ mul ~is_thumb
| DIVIDE -> KB.return @@ udiv ~is_thumb
| SDIVIDE -> KB.return @@ sdiv ~is_thumb
| LSHIFT -> KB.return @@ lsl_ ~is_thumb
| RSHIFT -> KB.return @@ lsr_ ~is_thumb
| ARSHIFT -> KB.return @@ asr_ ~is_thumb
| AND -> KB.return @@ logand ~is_thumb
| OR -> KB.return @@ logor ~is_thumb
| EQ -> KB.return @@ equals ~patch ~is_thumb ~branch
| NEQ -> KB.return @@ not_equals ~patch ~is_thumb ~branch
| LT -> KB.return @@ less_than ~patch ~is_thumb ~branch
| LE -> KB.return @@ less_or_equal ~patch ~is_thumb ~branch
| SLT -> KB.return @@ signed_less_than ~patch ~is_thumb ~branch
| SLE -> KB.return @@ signed_less_or_equal ~patch ~is_thumb ~branch
| XOR -> KB.return @@ xor ~is_thumb
| MOD | SMOD -> Err.(fail @@ Other (
Format.sprintf "sel_binop: unsupported operation %s"
(Bil.string_of_binop o)))
let get_const (v : 'a Theory.Bitv.t Theory.value) : word option =
let bil = KB.Value.get Exp.slot v in
match bil with
| Int w -> Some w
| _ -> None
let is_call (jmp : jmp term) : bool =
match Jmp.kind jmp with
| Call _ -> true
| _ -> false
let get_label (tid : tid) : Ir.operand KB.t =
let+ addr = KB.collect Theory.Label.addr tid in
match addr with
| None -> Ir.Label tid
| Some addr -> Ir.Offset (Bitvec.to_int addr |> Word.of_int ~width:32)
let get_dst (jmp : jmp term) : Ir.operand option KB.t =
let open KB.Syntax in
match Jmp.dst jmp, Jmp.alt jmp with
| Some dst, None ->
begin
match Jmp.resolve dst with
| First dst -> get_label dst >>| Option.return
| Second c -> KB.return @@ Option.map
~f:(fun w -> Ir.Offset w) (get_const c)
end
| Some _, Some dst ->
begin
match Jmp.resolve dst with
| First dst -> get_label dst >>| Option.return
| Second c -> KB.return @@ Option.map
~f:(fun w -> Ir.Offset w) (get_const c)
end
| _ -> KB.return @@ None
let sel_unop (o : unop)
~(is_thumb : bool) : (arm_pure -> arm_pure KB.t) KB.t =
match o with
| NOT -> KB.return @@ lognot ~is_thumb
| NEG -> KB.return @@ neg ~is_thumb
(* `lhs` is the left-hand side of the Def term that we are selecting from
(if any). The selector can make more informed decisions with this info.
Note that this only applies to the top-level expression, not intermediate
operations generated from subexpressions.
`branch` also applies to top-level expressions. We're going to use this
when handling the condition for a branch instruction. This lets us
generate better code and not have to write a specialized version of this
function.
NOTE: these parameters generally are NOT to be passed to subexpressions
(e.g. recursive calls to this function). These calls are meant to handle
intermediate operations (which would store their results in a temporary
variable). If these arguments are applied to those recursive calls, then
things could go very wrong.
*)
let rec select_exp
?(branch : Branch.t option = None)
?(lhs : var option = None)
(e : Bil.exp)
~(patch : Data.Patch.t option)
~(is_thumb : bool) : arm_pure KB.t =
let exp = select_exp ~patch ~is_thumb ~branch:None ~lhs:None in
let exp_binop_integer = select_exp_binop_integer ~patch ~is_thumb in
match e with
| Load (mem, BinOp (PLUS, a, Int w), _, size) ->
let* mem = exp mem in
let* ldr = ldr_op @@ Size.in_bits size in
let+ a = exp a in
let w = const w in
ternop ldr word_ty mem a w
| Load (mem, BinOp (MINUS, a, Int w), _, size) ->
let* mem = exp mem in
let* ldr = ldr_op @@ Size.in_bits size in
let+ a = exp a in
let w = const (Word.neg w) in
ternop ldr word_ty mem a w
| Load (mem, Int addr, _, size) ->
let* mem = exp mem in
let tmp = Ir.Var (create_temp word_ty) in
let op = Ir.simple_op (mov_const addr ~is_thumb) tmp Ir.[Const addr] in
let a = {op_val = tmp; op_eff = instr op empty_eff} in
let+ ldr = ldr_op @@ Size.in_bits size in
ternop ldr word_ty mem a @@ const (Word.zero 32)
| Load (mem, loc, _, size) ->
let* mem = exp mem in
let* loc = exp loc in
ldr (Size.in_bits size) mem loc ~is_thumb
| Store (mem, BinOp (PLUS, Var a, Int w), value, _ , _size) ->
let* mem = exp mem in
let* value = exp value in
str_base_off mem value a w ~is_thumb
| Store (mem, BinOp (MINUS, Var a, Int w), value, _ , _size) ->
let* mem = exp mem in
let* value = exp value in
str_base_off mem value a Word.(-w) ~is_thumb
| Store (mem, Int addr, value, _, _size) ->
let* mem = exp mem in
let tmp = Ir.Var (create_temp word_ty) in
let op = Ir.simple_op (mov_const addr ~is_thumb) tmp Ir.[Const addr] in
let loc = {op_val = tmp; op_eff = instr op empty_eff} in
let* value = exp value in
str mem value loc ~is_thumb
| Store (mem, loc, value, _, _size) ->
let* mem = exp mem in
let* loc = exp loc in
let* value = exp value in
(* We have to swap the arguments here, since ARM likes the value
first, and the location second *)
str mem value loc ~is_thumb
(* This should've been handled by the optimizer, but we will check anyway. *)
| BinOp (PLUS, Int w, a) when Word.(w = zero 32) -> exp a
| BinOp ((PLUS | MINUS), a, Int w) when Word.(w = zero 32) -> exp a
(* FIXME: this is amost certainly wrong *)
| BinOp (PLUS, a, b) when Exp.(a = b) ->
let* a = exp a in
lsl_ a ~is_thumb @@ const @@ Word.one 32
(* Thumb 2 encoding allows adding an 8-bit immediate, when
source and destination registers are the same. *)
| BinOp ((PLUS | MINUS) as o, Var a, Int w)
| BinOp ((PLUS | MINUS) as o, Int w, Var a)
when Option.exists lhs ~f:(Linear_ssa.same a)
&& is_thumb && Word.to_int_exn w <= 0xFF ->
let set_flags = not @@ is_stack_pointer a in
let op = if Caml.(o = PLUS) then Ops.add else Ops.sub in
KB.return @@ binop (op set_flags) word_ty (var a) (const w) ~is_thumb
(* Move the immediate operand to a temporary. *)
| BinOp (TIMES as o, Int w, x) | BinOp (TIMES as o, x, Int w) ->
let i = Word.to_int_exn w in
(* Power of two can be simplified to a left shift. *)
if Int.is_pow2 i then
let zero = const (Word.zero 32) in
let sh = Int.ctz i in
(* Greater than 31 is not encodable, but that also just means we're
shifting out every bit, so the result is zero. *)
if sh > 31 then KB.return zero
else
let* x = exp x in
lsl_ x ~is_thumb @@ const @@ Word.of_int sh ~width:32
else exp_binop_integer o w x
(* Immediate shift value must be within the range 1-32. *)
| BinOp (ARSHIFT as o, x, Int w)
when Word.(w < one 32 || w > of_int ~width:32 32) ->
exp_binop_integer o w x ~swap:true
(* Immediate shift value must be within the range 0-31. *)
| BinOp ((LSHIFT | RSHIFT), _, Int w)
when Word.(w > of_int ~width:32 31) -> KB.return @@ const @@ Word.zero 32
(* Move the immediate operand to a temporary. *)
| BinOp ((LSHIFT | RSHIFT | ARSHIFT) as o, Int w, x)
| BinOp ((OR | AND | XOR) as o, Int w, x)
| BinOp ((OR | AND | XOR) as o, x, Int w) -> exp_binop_integer o w x
| BinOp (o, a, b) ->
let* a = exp a in
let* b = exp b in
let* o = sel_binop o ~patch ~is_thumb ~branch in
o a b
| UnOp (o, a) -> begin
match o, Type.infer a with
| _ , Error e ->
Err.fail @@ Other
(sprintf "Arm_selector.select_exp: Type.infer failed: %s"
(Type.Error.to_string e))
| NOT, Ok (Imm 1) ->
(* Lazy way to compute the negation of that boolean. *)
let identity = Bil.(BinOp (EQ, a, Int (Word.zero 32))) in
select_exp identity ~patch ~branch ~is_thumb ~lhs:None
| _ ->
let* a = exp a in
let* o = sel_unop o ~is_thumb in
o a
end
| Var v -> begin
match Var.typ v with
| Imm 1 when Option.is_some branch ->
(* Lazy way to compute the boolean. *)
let v = var v in
let c = const @@ Word.zero 32 in
let* o = sel_binop NEQ ~patch ~is_thumb ~branch in
o v c
| Imm _ -> KB.return @@ var v
| Mem _ -> KB.return @@ mem v
| Unk -> Err.(fail @@ Other (
Format.asprintf "select_exp: encountered variable %a of \
unknown type" Var.pp v))
end
| Int w -> KB.return @@ const w
| Cast (_, _, _) -> Err.(fail @@ Other "select_exp: Cast is unsupported!")
| Let (_, _, _) -> Err.(fail @@ Other "select_exp: Let is unsupported!")
| Unknown (_, _) -> Err.(fail @@ Other "select_exp: Unknown is unsupported!")
| Ite (_, _, _) -> Err.(fail @@ Other "select_exp: Ite is unsupported!")
| Extract (_, _, _) -> Err.(fail @@ Other "select_exp: Extract is unsupported!")
| Concat (_, _) -> Err.(fail @@ Other "select_exp: Concat is unsupported!")
(* Helper function for selecting instructions that correspond to binops, but
whose operands must all be registers. Therefore, if one of the operands
is an immediate value, it must be loaded into a register first using an
intermediate operation.
`swap` will swap the order of the operands `lhs` and `rhs`.
*)
and select_exp_binop_integer
?(swap : bool = false)
(o : binop)
(lhs : word)
(rhs : exp)
~(patch : Data.Patch.t option)
~(is_thumb : bool) : arm_pure KB.t =
let* rhs = select_exp rhs ~patch ~is_thumb in
let* o = sel_binop o ~patch ~is_thumb ~branch:None in
let tmp = Ir.Var (create_temp word_ty) in
let op = Ir.simple_op Ops.(mov is_thumb) tmp [Const lhs] in
let lhs = {op_val = tmp; op_eff = instr op empty_eff} in
if swap then o rhs lhs else o lhs rhs
and select_stmt
(call_params : Ir.operand list)
(s : Blk.elt)
~(patch : Data.Patch.t option)
~(is_thumb : bool) : arm_eff KB.t =
let exp = select_exp ~patch ~is_thumb in
match s with
| `Def t -> begin
let lhs = Def.lhs t in
let rhs = Def.rhs t in
let mov = arm_mov ~is_thumb in
match Var.typ lhs with
| Imm _ | Unk ->
let* rhs = exp rhs ~lhs:(Some lhs) in
let lhs = Ir.Var (Ir.simple_var lhs) in
mov lhs rhs
| Mem _ ->
let lhs_mem = Ir.Void (Ir.simple_var lhs) in
(* We don't need to pass the lhs for mem assign, since
none of the patterns we match against will apply here. *)
let* rhs = exp rhs in
mov lhs_mem rhs
end
| `Jmp jmp ->
let open KB.Syntax in
let cond = Jmp.cond jmp in
let is_call = is_call jmp in
get_dst jmp >>= begin function
| None -> Err.(fail @@ Other (
Format.asprintf "Unexpected branch: %a" Jmp.pp jmp))
(* NOTE: branches if cond is zero *)
| Some dst -> match cond with
| Int w when Word.(w <> b0) ->
KB.return @@ goto dst call_params ~is_call
| _ ->
let+ {op_eff = eff; _} =
exp cond ~branch:(Some (Branch.create dst ~is_call)) in