-
Notifications
You must be signed in to change notification settings - Fork 3
/
cc_x86.M1
4969 lines (4119 loc) · 202 KB
/
cc_x86.M1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# SPDX-FileCopyrightText: © 2017 Jeremiah Orians
#
# SPDX-License-Identifier: GPL-3.0-or-later
DEFINE add_eax, 83C0
DEFINE add_ebx, 83C3
DEFINE add_ecx, 83C1
DEFINE add_edx, 83C2
DEFINE add_esi, 83C6
DEFINE add_eax,ebx 01D8
DEFINE add_eax,ecx 01C8
DEFINE add_ebx,eax 01C3
DEFINE add_ecx,edi 01F9
DEFINE and_eax,ebx 21D8
DEFINE and_eax, 25
DEFINE call_eax FFD0
DEFINE call E8
DEFINE cmp_eax, 83F8
DEFINE cmp_ebp, 83FD
DEFINE cmp_ebx, 83FB
DEFINE cmp_ecx, 83F9
DEFINE cmp_edx, 83FA
DEFINE cmp_esi, 83FE
DEFINE cmp_eax,ebx 39D8
DEFINE cmp_eax,ecx 39C8
DEFINE cmp_ebx,ecx 39CB
DEFINE cmp_ebx,edx 39D3
DEFINE cmp_esi,edi 39FE
DEFINE idiv_ebx F7FB
DEFINE imul_eax, 6BC0
DEFINE imul_ebp, 6BED
DEFINE imul_eax,ebx 0FAFC3
DEFINE int CD
DEFINE jbe8 76
DEFINE je 0F84
DEFINE jg 0F8F
DEFINE jl 0F8C
DEFINE jle 0F8E
DEFINE jmp E9
DEFINE jne 0F85
DEFINE lea_ecx,[esp] 8D0C24
DEFINE mov_eax, B8
DEFINE mov_ebx, BB
DEFINE mov_ecx, B9
DEFINE mov_edi, BF
DEFINE mov_edx, BA
DEFINE mov_esi, BE
DEFINE mov_eax,ebp 89E8
DEFINE mov_eax,ebx 89D8
DEFINE mov_eax,ecx 89C8
DEFINE mov_eax,edx 89D0
DEFINE mov_eax,esi 89F0
DEFINE mov_ebp,eax 89C5
DEFINE mov_ebp,edx 89D5
DEFINE mov_ebx,eax 89C3
DEFINE mov_ebx,ecx 89CB
DEFINE mov_ebx,edx 89D3
DEFINE mov_ecx,eax 89C1
DEFINE mov_ecx,ebx 89D9
DEFINE mov_edi,esi 89F7
DEFINE mov_edx,eax 89C2
DEFINE mov_edx,ebx 89DA
DEFINE mov_esi,eax 89C6
DEFINE mov_esi,edi 89FE
DEFINE mov_eax,[DWORD] A1
DEFINE mov_ebx,[DWORD] 8B1D
DEFINE mov_ecx,[DWORD] 8B0D
DEFINE mov_al,[eax] 8A00
DEFINE mov_al,[ebx] 8A03
DEFINE mov_al,[ecx] 8A01
DEFINE mov_al,[edx] 8A02
DEFINE mov_bl,[ebx] 8A1B
DEFINE mov_bl,[ecx] 8A19
DEFINE mov_bl,[edx] 8A1A
DEFINE mov_cl,[ebx] 8A0B
DEFINE mov_cl,[ebx+BYTE] 8A4B
DEFINE mov_eax,[eax] 8B00
DEFINE mov_eax,[ebx] 8B03
DEFINE mov_eax,[eax+BYTE] 8B40
DEFINE mov_eax,[ebp+BYTE] 8B45
DEFINE mov_eax,[ebx+BYTE] 8B43
DEFINE mov_eax,[ecx+BYTE] 8B41
DEFINE mov_eax,[edx+BYTE] 8B42
DEFINE mov_ebp,[ebp] 8B6D00
DEFINE mov_ebx,[eax+BYTE] 8B58
DEFINE mov_ebx,[ebx] 8B1B
DEFINE mov_ebx,[ebx+BYTE] 8B5B
DEFINE mov_ebx,[ecx+BYTE] 8B59
DEFINE mov_ecx,[eax+BYTE] 8B48
DEFINE mov_ecx,[ebx] 8B0B
DEFINE mov_ecx,[ecx] 8B09
DEFINE mov_ecx,[ecx+BYTE] 8B49
DEFINE mov_ecx,[edx+BYTE] 8B4A
DEFINE mov_edi,[edx+BYTE] 8B7A
DEFINE mov_edx,[edx+BYTE] 8B52
DEFINE mov_[DWORD],eax A3
DEFINE mov_[DWORD],ebx 891D
DEFINE mov_[DWORD],ecx 890D
DEFINE mov_[DWORD],edx 8915
DEFINE mov_[ebx],al 8803
DEFINE mov_[ecx],al 8801
DEFINE mov_[esi],al 8806
DEFINE mov_[ecx],bl 8819
DEFINE mov_[eax],ebx 8918
DEFINE mov_[eax],ecx 8908
DEFINE mov_[eax+BYTE],ebx 8958
DEFINE mov_[eax+BYTE],ecx 8948
DEFINE mov_[eax+BYTE],edx 8950
DEFINE mov_[ebp+BYTE],eax 8945
DEFINE mov_[ebp+BYTE],edx 8955
DEFINE mov_[ebp+BYTE],esi 8975
DEFINE mov_[ebx],eax 8903
DEFINE mov_[ecx+BYTE],eax 8941
DEFINE mov_[edx],eax 8902
DEFINE mov_[edx+BYTE],eax 8942
DEFINE mov_[edx+BYTE],ebp 896A
DEFINE mov_[edx+BYTE],ebx 895A
DEFINE mov_[edx+BYTE],ecx 894A
DEFINE mov_[edx+BYTE],esi 8972
DEFINE movzx_eax,al 0FB6C0
DEFINE movzx_ebx,bl 0FB6DB
DEFINE movzx_ecx,cl 0FB6C9
DEFINE NULL 00000000
DEFINE pop_eax 58
DEFINE pop_ebp 5D
DEFINE pop_ebx 5B
DEFINE pop_ecx 59
DEFINE pop_edi 5F
DEFINE pop_edx 5A
DEFINE pop_esi 5E
DEFINE push_eax 50
DEFINE push_ebp 55
DEFINE push_ebx 53
DEFINE push_ecx 51
DEFINE push_edi 57
DEFINE push_edx 52
DEFINE push_esi 56
DEFINE ret C3
DEFINE sal_eax, C1E0
DEFINE shr_eax, C1E8
DEFINE shr_ebx, C1EB
DEFINE sub_eax, 83E8
DEFINE sub_ecx, 83E9
DEFINE sub_esi, 83EE
DEFINE xchg_eax,ebx 93
# Register usage:
# EAX => Temps
# Struct TYPE format: (size 28)
# NEXT => 0
# SIZE => 4
# OFFSET => 8
# INDIRECT => 12
# MEMBERS => 16
# TYPE => 20
# NAME => 24
# Struct TOKEN_LIST format: (size 20)
# NEXT => 0
# LOCALS/PREV => 4
# S => 8
# TYPE => 12
# ARGS/DEPTH => 16
# Where the ELF Header is going to hit
# Simply jump to _start
# Our main function
:_start
pop_eax # Get the number of arguments
pop_ebx # Get the program name
pop_ebx # Get the actual input name
mov_ecx, %0 # prepare read_only
mov_eax, %5 # the syscall number for open()
int !0x80 # Now open that damn file
mov_[DWORD],eax &Input_file # Preserve the file pointer we were given
pop_ebx # Get the actual output name
mov_ecx, %577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC
mov_edx, %384 # Prepare file as RW for owner only (600 in octal)
mov_eax, %5 # the syscall number for open()
int !0x80 # Now open that damn file
cmp_eax, !0 # Check for missing output
jg %_start_out # Have real input
mov_eax, %1 # Use stdout
:_start_out
mov_[DWORD],eax &Output_file # Preserve the file pointer we were given
mov_eax, %45 # the Syscall # for SYS_BRK
mov_ebx, %0 # Get current brk
int !0x80 # Let the kernel do the work
mov_[DWORD],eax &MALLOC # Set our malloc pointer
mov_eax, %0 # HEAD = NULL
call %read_all_tokens # Read all tokens
call %Reverse_List # Reverse order
# call %debug_list # Try to figure out what is wrong
mov_[DWORD],eax &global_token # Set global_token
call %program # Convert into program
mov_eax, &header_string1 # Our header string
call %File_Print # Print it
mov_eax,[DWORD] &output_list # Our output_list
call %recursive_output # Print core program
# mov_eax, &header_string2 # Our Enable debug
# call %File_Print # Print it
mov_eax, &header_string3 # Our second label
call %File_Print # Print it
mov_eax,[DWORD] &globals_list # Our globals
call %recursive_output # Get them
mov_eax, &header_string4 # Our final header
call %File_Print # Print it
mov_eax,[DWORD] &strings_list # Our strings
call %recursive_output # Get them
mov_eax, &header_string5 # Make this a bare assembly
call %File_Print # Print it
:Done
# program completed Successfully
mov_ebx, %0 # All is well
mov_eax, %1 # put the exit syscall number in eax
int !0x80 # Call it a good day
:header_string1 "
# Core program
"
:header_string2 "
:ELF_data
"
:header_string3 "
# Program global variables
"
:header_string4 "
# Program strings
"
:header_string5 "
:ELF_end
"
# read_all_tokens function
# Receives Token_List* in EAX
# Tokenizes all input and returns updated list in EAX
# Returns TOKEN in EAX
# Uses EAX for C
:read_all_tokens
mov_[DWORD],eax &Token
call %fgetc
:read_all_tokens_loop
cmp_eax, !-4 # Check for EOF
je %read_all_tokens_done # Stop if found
call %get_token # Read all tokens
jmp %read_all_tokens_loop # Loop
:read_all_tokens_done
mov_eax,[DWORD] &Token
ret
# get_token function
# Receives INT in EAX
# Makes a list of TOKEN_LIST
# C and STRING_INDEX are stored in memory, ECX is used for S and EDX is used for current
# Returns C in EAX
:get_token
push_ebx # Protect EBX
push_ecx # Protect ECX
push_edx # Protect EDX
mov_[DWORD],eax &C # Set C
mov_eax, %20 # Malloc CURRENT
call %malloc # Get Pointer
mov_edx,eax # Set CURRENT
mov_eax, %256 # Malloc the string
call %malloc # Get pointer to S
mov_ecx,eax # Set S
mov_[edx+BYTE],ecx !8 # CURRENT->S = S
:reset
mov_[DWORD],ecx &string_index # S[0]
mov_eax,[DWORD] &C # Using C
call %clear_white_space # Clear WhiteSpace
mov_[DWORD],eax &C # Set C
cmp_eax, !-4 # Check for EOF
je %get_token_abort # if EOF abort
cmp_eax, !35 # Check for '#'
jne %get_token_alpha # Nope
# Deal with # line comments
call %purge_macro # Let it handle it
mov_[DWORD],eax &C # Set C
jmp %reset # Try again
:get_token_alpha
mov_eax,[DWORD] &C # Send C
mov_ebx, &alphas # Get alphanumerics
call %In_Set # See if in set
cmp_eax, !1 # IF TRUE
jne %get_token_symbol # Otherwise
# Store keywords
mov_eax,[DWORD] &C # Send C
call %preserve_keyword # Store
mov_[DWORD],eax &C # Set C
jmp %get_token_done # Be done with this token
:get_token_symbol
mov_eax,[DWORD] &C # Send C
mov_ebx, &symbols # Get symbols
call %In_Set # See if in set
cmp_eax, !1 # IF TRUE
jne %get_token_strings # Otherwise
# Store symbols
mov_eax,[DWORD] &C # Send C
call %preserve_symbol # Store
mov_[DWORD],eax &C # Set C
jmp %get_token_done # Be done with this token
:get_token_strings
mov_eax,[DWORD] &C # Send C
mov_ebx, &strings # Get strings
call %In_Set # See if in set
cmp_eax, !1 # IF TRUE
jne %get_token_comment # Otherwise
# Store String
mov_eax,[DWORD] &C # Send C
call %consume_word # Store
mov_[DWORD],eax &C # Set C
jmp %get_token_done # Be done with this token
:get_token_comment
mov_eax,[DWORD] &C # Send C
cmp_eax, !47 # IF '/' == C
jne %get_token_else # Otherwise
call %consume_byte # Hope it just is '/'
mov_[DWORD],eax &C # Set C
cmp_eax, !42 # IF '*' we have '/*'
jne %get_token_comment_line # Check for '//'
# Deal with /* block comments */
call %fgetc # get next C
mov_[DWORD],eax &C # Set C
:get_token_comment_block_outer
mov_eax,[DWORD] &C # Using C
cmp_eax, !47 # IF '/' != C
je %get_token_comment_block_done # be done
:get_token_comment_block_inner
mov_eax,[DWORD] &C # Using C
cmp_eax, !42 # IF '*' != C
je %get_token_comment_block_iter # jump over
# Deal with inner loop
call %fgetc # get next C
mov_[DWORD],eax &C # Set C
jmp %get_token_comment_block_inner # keep going
:get_token_comment_block_iter
call %fgetc # get next C
mov_[DWORD],eax &C # Set C
jmp %get_token_comment_block_outer
:get_token_comment_block_done
call %fgetc # get next C
mov_[DWORD],eax &C # Set C
jmp %reset # throw away, try again
:get_token_comment_line
cmp_eax, !47 # IF '/' we have //
jne %get_token_done # keep if just '/'
# Deal with // line comment
call %fgetc # drop to match
mov_[DWORD],eax &C # Set C
jmp %reset # throw away, try again
:get_token_else
mov_eax,[DWORD] &C # Send C
call %consume_byte
mov_[DWORD],eax &C # Set C
:get_token_done
mov_eax,[DWORD] &Token # TOKEN
mov_[edx+BYTE],eax !4 # CURRENT->PREV = TOKEN
mov_[edx],eax # CURRENT->NEXT = TOKEN
mov_[DWORD],edx &Token # TOKEN = CURRENT
:get_token_abort
pop_edx # Restore EDX
pop_ecx # Restore ECX
pop_ebx # Restore EBX
mov_eax,[DWORD] &C # Return C
ret
# Malloc isn't actually required if the program being built fits in the initial memory
# However, it doesn't take much to add it.
# Requires [MALLOC] to be initialized and EAX to have the number of desired bytes
:malloc
push_ebx # Protect EBX
push_ecx # Protect ECX
push_edx # Protect EDX
mov_ebx,[DWORD] &MALLOC # Using the current pointer
add_ebx,eax # Request the number of desired bytes
mov_eax, %45 # the Syscall # for SYS_BRK
int !0x80 # call the Kernel
mov_eax,[DWORD] &MALLOC # Return pointer
mov_[DWORD],ebx &MALLOC # Update pointer
pop_edx # Restore EDX
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# clear_white_space function
# Receives INT C in EAX
# Returns first non-whitespace char in EAX
:clear_white_space
cmp_eax, !32 # Check for ' '
je %clear_white_space_wipe # wipe it out
cmp_eax, !10 # Check for '\n'
je %clear_white_space_wipe # wipe it output
cmp_eax, !9 # Check for '\t'
jne %clear_white_space_done # looks like non-whitespace
:clear_white_space_wipe
call %fgetc # Read a new byte
cmp_eax, !-4 # Check for EOF
je %clear_white_space_done # Short circuit
jmp %clear_white_space # iterate
:clear_white_space_done
ret
# In_Set function
# Receives Char C in EAX and CHAR* in EBX
# Returns 1 if true, zero if false in EAX
:In_Set
push_ebx # Protect EBX
push_ecx # Protect ECX
:In_Set_loop
mov_cl,[ebx] # Read char
movzx_ecx,cl # Zero extend it
cmp_eax,ecx # See if they match
je %In_Set_True # return true
cmp_ecx, !0 # Check for NULL
je %In_Set_False # return false
add_ebx, !1 # s = s + 1
jmp %In_Set_loop # Keep looping
:In_Set_True
mov_eax, %1 # Set True
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
:In_Set_False
mov_eax, %0 # Set FALSE
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
:alphas "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
:symbols "<=>|&!-"
:strings '22 27 00'
# purge_macro function
# Receives CH in EAX
# Reads chars until Line feed is read
# returns line feed
:purge_macro
call %fgetc # read next char
cmp_eax, !10 # Check for '\n'
jne %purge_macro # Keep going
ret
# preserve_keyword function
# Receives INT C in EAX
# collects all chars in keyword
# Returns C in EAX
# Uses ECX for INT C
:preserve_keyword
push_ebx # Protect EBX
push_ecx # Protect ECX
mov_ecx,eax # Setup C
mov_ebx, &alphas # Concerning ourselves with "abc.."
:preserve_keyword_loop
call %In_Set # Check if alphanumerics
cmp_eax, !1 # IF TRUE
jne %preserve_keyword_label # Otherwise check for label
mov_eax,ecx # Pass C
call %consume_byte # consume that byte
mov_ecx,eax # Update C
jmp %preserve_keyword_loop # keep looping
:preserve_keyword_label
mov_eax,ecx # Fix return
cmp_eax, !58 # Check for ':'
jne %preserve_keyword_done # be done
# Fix our goto label
call %fixup_label # Fix the label
mov_eax, %32 # Return Whitespace
:preserve_keyword_done
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# preserve_symbol function
# Receives INT C in EAX
# collects all chars in symbol
# Returns C in EAX
# Uses ECX for INT C
:preserve_symbol
push_ebx # Protect EBX
push_ecx # Protect ECX
mov_ecx,eax # Setup C
mov_ebx, &symbols # Concerning ourselves with "<=>.."
:preserve_symbol_loop
call %In_Set # Check if symbol
cmp_eax, !1 # IF TRUE
jne %preserve_symbol_done # Otherwise be done
mov_eax,ecx # Pass C
call %consume_byte # consume that byte
mov_ecx,eax # Update C
jmp %preserve_symbol_loop # keep looping
:preserve_symbol_done
mov_eax,ecx # Fix return
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# consume_word function
# receives INT C in EAX
# returns INT C in EAX
# Uses EAX for C, EBX for FREQ and ECX for ESCAPE
:consume_word
push_ebx # Protect EBX
push_ecx # Protect ECX
mov_ebx,eax # FREQ = C
mov_ecx, %0 # ESCAPE = FALSE
:consume_word_loop
cmp_ecx, !0 # IF !ESCAPE
jne %consume_word_escape # Enable escape
cmp_eax, !92 # if '\\'
jne %consume_word_iter # keep state
mov_ecx, %1 # ESCAPE = TRUE
jmp %consume_word_iter # keep going
:consume_word_escape
mov_ecx, %0 # ESCAPE = FALSE
:consume_word_iter
call %consume_byte # read next char
cmp_ecx, !0 # IF ESCAPE
jne %consume_word_loop # keep looping
cmp_eax,ebx # IF C != FREQ
jne %consume_word_loop # keep going
call %fgetc # return next char
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# consume_byte function
# Receives INT C in EAX
# Inserts C into string S, updates String S
# Returns Next char in EAX
:consume_byte
push_ebx # Protect EBX
mov_ebx,[DWORD] &string_index # S[0]
mov_[ebx],al # S[0] = C
add_ebx, !1 # S = S + 1
mov_[DWORD],ebx &string_index # Update S
call %fgetc
pop_ebx # Restore EBX
ret
# fixup_label function
# Receives S in ECX
# prepends ':' to string and returns registers unchanged
# Uses EAX for HOLD, EBX for PREV and ECX for S[0]
:fixup_label
push_eax # Protect EAX
push_ebx # Protect EBX
push_ecx # Protect ECX
mov_eax, %58 # HOLD = ':'
mov_ecx,[edx+BYTE] !8 # HOLD_STRING[0]
:fixup_label_loop
mov_ebx,eax # PREV = HOLD
mov_al,[ecx] # HOLD = HOLD_STRING[I]
movzx_eax,al # make useful
mov_[ecx],bl # HOLD_STRING[I] = PREV
add_ecx, !1 # I = I + 1
cmp_eax, !0 # IF NULL == HOLD
jne %fixup_label_loop # Keep looping
pop_ecx # Restore ECX
pop_ebx # Restore EBX
pop_eax # Restore EAX
ret
# fgetc function
# Loads FILE* from [INPUT_FILE]
# Returns -4 (EOF) or char in EAX
:fgetc
push_ebx # Protect EBX
push_ecx # Protect ECX
push_edx # Protect EDX
mov_eax, %-4 # Put EOF in eax
push_eax # Assume bad (If nothing read, value will remain EOF)
lea_ecx,[esp] # Get stack address
mov_ebx,[DWORD] &Input_file # Where are we reading from
mov_eax, %3 # the syscall number for read
mov_edx, %1 # set the size of chars we want
int !0x80 # call the Kernel
pop_eax # Get either char or EOF
cmp_eax, !-4 # Check for EOF
je %fgetc_done # Return as is
movzx_eax,al # Make it useful
:fgetc_done
pop_edx # Restore EDX
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# Reverse_List function
# Receives List in EAX
# Returns the list reversed in EAX
:Reverse_List
push_ebx # Protect EBX
push_ecx # Protect ECX
mov_ebx,eax # Set HEAD
mov_eax, %0 # ROOT = NULL
:Reverse_List_Loop
cmp_ebx, !0 # WHILE HEAD != NULL
je %Reverse_List_Done # Stop otherwise
mov_ecx,[ebx] # NEXT = HEAD->NEXT
mov_[ebx],eax # HEAD->NEXT = ROOT
mov_eax,ebx # ROOT = HEAD
mov_ebx,ecx # HEAD = NEXT
jmp %Reverse_List_Loop # Keep Going
:Reverse_List_Done
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# recursive_output function
# Receives list in EAX
# walks the list and prints the I->S for all nodes backwards
# Uses EBX for I
:recursive_output
push_ebx # Protect EBX
push_ecx # Protect ECX
cmp_eax, !0 # Check for NULL
je %recursive_output_done # Skip the work
mov_ebx,eax # I = Head
mov_eax,[ebx] # Iterate to next Token
call %recursive_output # Recurse
mov_eax,[ebx+BYTE] !8 # Using S
call %File_Print # Print it
:recursive_output_done
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# File_Print function
# Receives CHAR* in EAX
# calls fputc for every non-null char
:File_Print
push_ebx # Protect EBX
push_ecx # Protect ECX
mov_ebx,eax # Protect S
cmp_eax, !0 # Protect against nulls
je %File_Print_Done # Simply don't try to print them
:File_Print_Loop
mov_al,[ebx] # Read byte
movzx_eax,al # zero extend
cmp_eax, !0 # Check for NULL
je %File_Print_Done # Stop at NULL
call %fputc # write it
add_ebx, !1 # S = S + 1
jmp %File_Print_Loop # Keep going
:File_Print_Done
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# fputc function
# receives CHAR in EAX and load FILE* from [OUTPUT_FILE]
# writes char and returns
:fputc
push_ebx # Protect EBX
push_ecx # Protect ECX
push_edx # Protect EDX
push_eax # We are writing eax
lea_ecx,[esp] # Get stack address
mov_ebx,[DWORD] &Output_file # Write to target file
mov_eax, %4 # the syscall number for write
mov_edx, %1 # set the size of chars we want
int !0x80 # call the Kernel
pop_eax # Restore stack
pop_edx # Restore EDX
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# program function
# receives nothing, returns nothing
# Uses EAX for type_size
:program
# The binary initialized the globals to null, so we can skip those steps
push_ebx # Protect EBX
push_ecx # Protect ECX
:new_type
mov_eax,[DWORD] &global_token # Using global_token
cmp_eax, !0 # Check if NULL
je %program_done # Be done if null
mov_ebx,[eax+BYTE] !8 # GLOBAL_TOKEN->S
mov_eax, &constant # "CONSTANT"
call %match # IF GLOBAL_TOKEN->S == "CONSTANT"
cmp_eax, !0 # If true
jne %program_else # Looks like not a constant
# Deal with minimal constant case
mov_eax,[DWORD] &global_token # Using global_token
mov_eax,[eax] # global_token->next
mov_[DWORD],eax &global_token # global_token = global_token->next
mov_eax,[eax+BYTE] !8 # global_token->S
mov_ebx, %0 # NULL
mov_ecx,[DWORD] &global_constant_list # global_constant_list
call %sym_declare # Declare that constant
mov_[DWORD],eax &global_constant_list # global_constant_list = sym_declare(global_token->s, NULL, global_constant_list);
mov_ebx,[DWORD] &global_token # Using global_token
mov_ebx,[ebx] # global_token->next
mov_[eax+BYTE],ebx !16 # global_constant_list->arguments = global_token->next
mov_ebx,[ebx] # global_token->next->next
mov_[DWORD],ebx &global_token # global_token = global_token->next->next
jmp %new_type # go around again
:program_else
call %type_name # Figure out the type_size
cmp_eax, !0 # IF NULL == type_size
je %new_type # it was a new type
# Add to global symbol table
mov_ebx,eax # put type_size in the right spot
mov_eax,[DWORD] &global_token # Using global token
mov_eax,[eax+BYTE] !8 # global_token->S
mov_ecx,[DWORD] &global_symbol_list # Using global_symbol_list
call %sym_declare # Declare symbol
mov_[DWORD],eax &global_symbol_list # global_symbol_list = sym_declare(global_token->s, type_size, global_symbol_list);
mov_ebx,[DWORD] &global_token # Using global token
mov_ebx,[ebx] # global_token->next
mov_[DWORD],ebx &global_token # global_token = global_token->next
mov_ebx,[DWORD] &global_token # Using global token
mov_ebx,[ebx+BYTE] !8 # global_token->S
mov_eax, &semicolon # ";"
call %match # if(match(";", global_token->s))
cmp_eax, !0 # If true
jne %program_function # looks like not a match
# Deal with the global variable
mov_ebx,[DWORD] &globals_list # Using globals_list
mov_eax, &program_string_0 # ":GLOBAL_"
call %emit # Emit it
mov_ebx,eax # update globals_list
mov_eax,[DWORD] &global_token # Using global token
mov_eax,[eax+BYTE] !4 # global token->prev
mov_eax,[eax+BYTE] !8 # global token->prev->s
call %emit # Emit it
mov_ebx,eax # update globals_list
mov_eax, &program_string_1 # "\nNULL\n"
call %emit # Emit it
mov_[DWORD],eax &globals_list # update globals_list
mov_eax,[DWORD] &global_token # Using global token
mov_eax,[eax] # global_token->next
mov_[DWORD],eax &global_token # global_token = global_token->next
jmp %new_type # go around again
:program_function
mov_ebx,[DWORD] &global_token # Using global token
mov_ebx,[ebx+BYTE] !8 # global_token->S
mov_eax, &open_paren # "("
call %match # if(match(";", global_token->s))
cmp_eax, !0 # If true
jne %program_error # Otherwise deal with error case
# Deal with function definition
call %declare_function # Lets get the parsing rolling
jmp %new_type # Keep looping through functions
:program_error
# Deal with the case of something we don't support
# NOT IMPLEMENTED
:program_done
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
# Strings needed by the program function
:program_string_0 ":GLOBAL_"
:program_string_1 "
NULL
"
# declare_function function
# Receives nothing and returns nothing
# Sets current function and adds it to the global function list
:declare_function
push_ebx # Protect EBX
push_ecx # Protect ECX
mov_eax, %0 # Using NULL
mov_[DWORD],eax ¤t_count # current_count = 0
mov_eax,[DWORD] &global_token # Using global token
mov_eax,[eax+BYTE] !4 # global token->prev
mov_eax,[eax+BYTE] !8 # global token->prev->s
mov_ebx, %0 # NULL
mov_ecx,[DWORD] &global_function_list # global_function_list
call %sym_declare # sym_declare(global_token->prev->s, NULL, global_function_list);
mov_[DWORD],eax &function # function = sym_declare(global_token->prev->s, NULL, global_function_list);
mov_[DWORD],eax &global_function_list # global_function_list = function
call %collect_arguments # collect all of the function arguments
mov_eax,[DWORD] &global_token # Using global token
mov_eax,[eax+BYTE] !8 # global token->s
mov_ebx, &semicolon # ";"
call %match # IF global token->s == ";"
cmp_eax, !0 # If true
jne %declare_function_full # It was a prototype
# Deal with prototypes
mov_eax,[DWORD] &global_token # Using global token
mov_eax,[eax] # global token->next
mov_[DWORD],eax &global_token # global token = global token->next
jmp %declare_function_done # Move on
:declare_function_full
# Deal with full function definitions
mov_eax, &declare_function_string_0 # "# Defining function "
call %emit_out # emit it
mov_eax,[DWORD] &function # function
mov_eax,[eax+BYTE] !8 # function->s
call %emit_out # emit it
mov_eax, &declare_function_string_1 # "\n:FUNCTION_"
call %emit_out # emit it
mov_eax,[DWORD] &function # function
mov_eax,[eax+BYTE] !8 # function->s
call %emit_out # emit it
mov_eax, &declare_function_string_3 # "\n"
call %emit_out # emit it
call %statement # Recursively get the function pieces
mov_eax,[DWORD] &output_list # output
mov_eax,[eax+BYTE] !8 # output->s
mov_ebx, &declare_function_string_2 # "ret\n"
call %match # IF output->s == "ret\n"
cmp_eax, !0 # If true we can skip adding it
je %declare_function_done # otherwise we need to add it
# Add the return to the end of a function lacking a return;
mov_eax, &declare_function_string_2 # "ret\n"
call %emit_out # emit it
:declare_function_done
pop_ecx # Restore ECX
pop_ebx # Restore EBX
ret
:declare_function_string_0 "# Defining function "
:declare_function_string_1 "
:FUNCTION_"
:declare_function_string_2 "ret
"
:declare_function_string_3 "
"
# collect_arguments function
# Receives nothing
# Returns Nothing
# Adds arguments to the function definition
# holds struct type* type_size in ECX, then replace with struct token_list* a in ECX when type_size is used
:collect_arguments
push_ebx # Protect EBX
push_ecx # Protect ECX
mov_eax,[DWORD] &global_token # Using global_token
mov_eax,[eax] # global_token->next
mov_[DWORD],eax &global_token # global_token = global_token->next
:collect_arguments_loop
mov_ebx,[DWORD] &global_token # Using global_token
mov_ebx,[ebx+BYTE] !8 # global_token->S
mov_eax, &close_paren # ")"
call %match # IF global_token->S == ")"
cmp_eax, !0 # we reached the end
je %collect_arguments_done # be done
# deal with the case of there are arguments
call %type_name # Get the type
mov_ecx,eax # put type_size safely out of the way
mov_ebx,[DWORD] &global_token # Using global_token
mov_ebx,[ebx+BYTE] !8 # global_token->S
mov_eax, &close_paren # ")"
call %match # IF global_token->S == ")"
cmp_eax, !0 # is a foo(int, char,void) case
je %collect_arguments_common # deal with commas
# Trying second else
mov_ebx,[DWORD] &global_token # Using global_token
mov_ebx,[ebx+BYTE] !8 # global_token->S
mov_eax, &comma # ","
call %match # IF global_token->S == ","
cmp_eax, !0 # then deal with the common
je %collect_arguments_common # case of commas between arguments
# deal with foo(int a, char b)
mov_eax,[DWORD] &global_token # Using global_token
mov_eax,[eax+BYTE] !8 # global_token->S
mov_ebx,ecx # put type_size in the right place
mov_ecx,[DWORD] &function # Using function
mov_ecx,[ecx+BYTE] !16 # function->args
call %sym_declare # sym_declare(global_token->s, type_size, function->arguments);
mov_ecx,eax # put a in a safe place
mov_eax,[DWORD] &function # Using function
mov_eax,[eax+BYTE] !16 # function->args
cmp_eax, !0 # IF function->args == NULL
jne %collect_arguments_another # otherwise it isn't the first
# Deal with the case of first argument in the function
mov_eax, %-4 # -4
mov_[ecx+BYTE],eax !16 # a->depth = -4
jmp %collect_arguments_next # get to next
:collect_arguments_another
# deal with the case of non-first arguments
mov_eax,[DWORD] &function # Using function
mov_eax,[eax+BYTE] !16 # function->args
mov_eax,[eax+BYTE] !16 # function->args->depth
sub_eax, !4 # function->args->depth - 4
mov_[ecx+BYTE],eax !16 # a->depth = function->args->depth - 4