-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpalanqin.asm
2427 lines (2174 loc) · 65.2 KB
/
palanqin.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
; Palanquin -- Cortex-M0 emulator for 8086
; Copyright (c) 2020, 2021, 2023 Robert Clausecker <[email protected]>
cpu 8086 ; restrict nasm to 8086 instructions
bits 16
; symbols provided by the linker script
extern end, edata, etext, bsswords
section .data
ident db "Palanqin 0.3 (c) 2020, 2021, 2023 "
db "Robert Clausecker <[email protected]>"
crlf db 13, 10, 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Parameters ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
stack equ 0x100 ; emulator stack size in bytes (multiple of 16)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Macros and Constants ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; low and high registers.
; no parentheses so we can use segment overrides if desired
%define rlo(r) [r*4+bp+regs]
%define rhi(r) [r*4+bp+regs+hi]
; functionality not implemented
%macro todo 0
int3
ret
%endmacro
; compare DI with [bp+zsreg]. If both are equal, fix the flags.
; Trashes AX, preserves all other registers.
; the intent is to save the flags if Rd == [bp+zsreg] and flag
; recovery would otherwise be impossible.
; a fixrest macro must be placed under the same label
%macro fixRd 0
cmp di, [bp+zsreg] ; is Rd == [bp+zsreg]?
je .fixRd
.endfixRd: ; used by fixrest
%endmacro
%macro fixrest 0
.fixRd: mov ax, .endfixRd ; return address
push ax ; for fixflags.entry
jmp fixflags.entry ; which we call by means of a jump
%endmacro
; load one instruction into AX and advance PC past it.
; trashes BX, CX, and SI. Assumes the PC cache is set up correctly.
%macro ifetch 0
mov si, rlo(15) ; load offset
dec si ; clear thumb bit
mov cx, es ; remember old DS
mov es, [bp+pcseg] ; load translated PC segment
call [bp+pcldrh] ; load instruction from memory (sets ES=CX)
add word rlo(15), 2 ; PC += 2
jnc %%nofix ; fix PC cache if rhi(15) changes
call ifetchtail
%%nofix:
%endmacro
; align to an even address, use 0xcc for padding
%macro aligncc 0
align 2, int3
%endmacro
; 8086 flags (those we find useful)
CF equ 0x0001
ZF equ 0x0040
SF equ 0x0080
OF equ 0x0800
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Startup and Initialisation ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
section .text
global start
; relocate the stack
start: mov sp, end+stack ; beginning of stack
; set up bp with emulator state
mov bp, state
; print copyright notice
mov si, ident
call puts
; initialise .bss
xor ax, ax
mov di, edata
mov cx, bsswords ; .bss section length in words
rep stosw ; clear .bss
; configure emulator base address
mov dx, sp ; an address just past the end of the memory
mov cl, 4
shr dx, cl ; convert to paragraph count
mov ax, cs
add ax, dx ; emulator image base address
; initialise memory maps
lea di, [bp+mmadj] ; initialise adjusted memory tables
call mminit ; to be relative to the image base address
lea di, [bp+mmraw] ; initialise raw memory tables
xor ax, ax ; to base segment 0
call mminit
; terminate argument vector
mov di, 0x80 ; argument vector length pointer
xor bx, bx ; clear bx
mov bl, [di] ; load argument vector length
inc di ; beginning of arguments
mov [bx+di], bh ; NUL-terminate arguments
mov al, 0x20 ; AL = ' '
mov cx, bx ; max length: argument vector length
repe scasb ; find first non-space
dec di ; go back to first non-space
mov [file], di ; remember file name for later
cmp byte [di], bh ; was there any argument at all?
jne .0
; no argument was given: print usage and exit
mov si, usage
call puts ; print usage
.die: mov ax, 0x4c01 ; error level 1 (failure)
int 0x21 ; 0x4c: TERMINATE PROGRAM
; an argument was given: try to open it
.0: mov dx, di ; file name
mov ax, 0x3d00 ; AL=00 (open file for reading)
int 0x21 ; 0x3d: OPEN EXISTING FILE
jnc .1 ; did an error occur?
; opening, reading, or closing the file failed
.err: push cs
pop ds ; set DS = CS
mov si, [file] ; load file name for error message
call perror ; print error message
jmp .die ; and die
; opening the file was succesful: load program image
.1: xchg bx, ax ; the file handle is needed in BX
mov ax, sp
mov dx, cs ; DX:AX = image base
; read until EOF
; TODO: reject image if it is too large
.2: test ax, ax ; check if AX > 0x8000 to make sure that
jns .3 ; 0x8000 bytes remain in DX:AX
sub ax, 0x8000 ; if not, shift segments to ensure this
add dh, 0x08
.3: mov ds, dx
xchg dx, ax ; buffer address at DS:DX
mov cx, 0x8000 ; number of bytes to read
mov ah, 0x3f
int 0x21 ; 0x3f: READ FROM FILE VIA HANDLE
jc .err ; IO error?
test ax, ax ; end of file reached?
jz .eof
add ax, dx ; compute new base address
mov dx, ds ; move buffer address to DX:AX
jmp .2 ; and read some more data
; close the image file
.eof: mov ah, 0x3e ; BX still contains the handle here
int 0x21 ; 0x3e: CLOSE A FILE HANDLE
jc .err
; restore DS = CS
push cs
pop ds
; initial register set up: image base address (R0)
mov bx, [bp+imgbase] ; load image base
mov dx, bx
xor ax, ax ; DX:AX contains the image base
call seglin ; as a linear address
mov rlo(0), ax ; write load address to R0
mov rhi(0), dx
; initial register set up: memory size (R1)
mov dx, [2] ; load first segment past program image from PSP
sub dx, bx ; compute number of paragraphs available
xor ax, ax ; to the program
call seglin ; and convert to a linear address
mov rlo(1), ax ; write memory size to R1
mov rhi(1), dx
; initial register set up: stack pointer and reset vector
mov ds, bx ; load DS with emulated address space
xor si, si ; vector table begin
lea di, rlo(13) ; DI = &SP
movsw ; load initial SP, low half
movsw ; load initial SP, high half
add di, 4 ; advance past LR
movsw ; load initial PC (reset vector), low half
movsw ; load initial PC (reset vector), high half
call run ; emulate a Cortex M0
mov al, rlo(0) ; load error level from R0
mov ah, 0x4c
int 0x21 ; 0x4c: TERMINATE PROGRAM
section .data
usage db "Usage: PALANQIN CORTEXM0.IMG", 0
section .bss
align 2
file resw 1 ; image file name
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Emulator State ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; the emulator state structure
struc st
regs resd 16 ; ARM registers
hi equ 2 ; advances low register half to high half
flags resw 1 ; CPU flags in 8086 format
; only CF, ZF, SF, and OF are meaningful
zsreg resw 1 ; pointer to the register according to which the
; zero and sign flags shall be set or 0 if they
; are already set up correctly. This is R0--R7.
; instruction decoding state variables
; immediate operands are zero/sign-extended to 16 bit
; register operands are represented by a pointer to the appropriate
; regs array member
oprC resw 1 ; third operand (towards least significant bit)
oprB resw 1 ; second operand (middle of the instruction)
oprA resw 1 ; first operand (towards most significant bit)
; PC decoding cache
; this cache is used by ifetch to fetch code, avoiding the need to
; translate PC unless the high word changes
pchi resw 1 ; high PC word currently translated
pcseg resw 1 ; translated segment corresponding to pchi
pcldrh resw 1 ; ldrh accessor function for pchi
; Memory maps. The high word of an ARM address sans the address space
; nibble is looked up in this table to form a segment. The low word
; stays the same, forming an offset.
mmsize equ 16 ; number of entries in a memory map
mmraw resw mmsize ; memory maps for unadjusted memory
imgbase equ $ ; image base address (first seg. of adj. table)
mmadj resw mmsize ; memory maps for adjusted memory
endstruc
section .bss
alignb 4
state resb st_size ; BP points here
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Instruction Simulation ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
section .text
; run the emulation until we need to stop for some reason
run: push cs ; set up es = ds = cs
push cs
pop ds
pop es
call fixPC ; set up PC cache
.step: call step ; simulate one instruction
jmp .step ; do it again and again
; simulate one instruction. Assumes ES=DS=CS.
aligncc
step: ifetch ; fetch instruction
push ax ; push a copy of the current instruction
mov bx, ax ; and keep another one in AX
mov cl, 5 ; mask out the instruction's top 4 bits
rol bx, cl ; and form a table offset
and bx, 0x1e ; bx = ([insn] & 0xf000) >> (16 - 4) << 1
mov dx, 0x1c ; mask for use with the decode handlers
lea si, [bp+regs] ; for use with the decode handlers
lea di, [bp+oprC] ; for use with the decode handlers
mov cx, ax ; for use with the decode handlers
; which also assume that AX=insn
jmp [dtXXXX+bx] ; decode operands
section .data
align 2, db 0
; decoder jump table: decode the operands according
; to the top 4 instruction bits
; the decode handlers decode the various instruction fields and store
; their contents into the emulator state variables.
dtXXXX: dw imm5rr ; 000XX imm5 / Rm / Rd
dw d0001 ; 000110 Rm / Rn / Rd
; 000111 imm3 / Rn / Rd
dw rimm8 ; 001XX Rdn / imm8
dw rimm8
dw d0100 ; 010000 Rm / Rdn
; 010001 DN / rm / Rdn
; 01001 Rd / imm8
dw rrr ; 0101 Rm / Rn / Rd
dw imm5rr ; 011XX imm5 / Rn / Rd
dw imm5rr
dw imm5rr ; 1000X imm5 / Rn / Rd
dw rimm8 ; 1001X Rd / imm8
dw rimm8 ; 1010X Rd / imm8
dw d1011 ; 1011 misc. instructions
dw rimm8 ; 1100X Rn / imm8
dw d1101 ; 1101 cond / imm8
dw d1110 ; 11100 imm11
; 11101 32 bit instructions
dw d1111 ; 1111 32 bit instructions
section .text
; special decode handler for instructions starting with 0001
; 000XX... where XX != 11 is decoded as imm5 / reg / reg,
; 000110... as reg / reg / reg and
; 000111 as imm5 / reg / reg again
aligncc
d0001: and ch, 0xc ; mask the two bits 00001100
cmp ch, 0x08 ; are these set to 10?
mov cx, ax ; restore AX copy
je rrr ; if yes, decode as rrr
; else fall through and decode as imm5rr
; decode handler for imm5 / reg / reg
; instruction layout: XXXX XAAA AABB BCCC
; for d000, we don't treat the 00011 opcodes specially;
; the handler for these instructions must manually decode the
; register from oprA
align 2
imm5rr: shl ax, 1 ; AX = XXXX AAAA ABBB CCC0
shl ax, 1 ; AX = XXXA AAAA BBBC CC00
mov ch, ah ; CX = XXXA AAAA **BB BCCC
and ax, dx ; AX = 0000 0000 000C CC00
add ax, si ; AX = ®s[C]
stosw ; oprC = ®s[C]
shr cl, 1 ; CX = XXXA AAAA 0**B BBCC
mov ax, cx
and ax, dx ; AX = 0000 0000 000B BB00
add ax, si ; AX == ®s[B]
stosw ; oprB = ®s[B]
mov al, ch ; AL = XXXA AAAA
and ax, 0x1f ; AX = 0000 0000 000A AAAA
stosw ; oprA = A
pop ax ; the current instruction
jmp [htXXXX+bx] ; execute behaviour
; decode handler for reg / imm8
; instruction layout: XXXX XBBB CCCC CCCC
; TODO: oprC isn't actually needed anywere. Can we skip setting it up?
aligncc
rimm8: xor cx, cx
xchg ah, cl ; AX=imm8, CX=reg
stosw ; oprC=imm8
xchg ax, cx ; AX=reg
shl ax, 1 ; AX = XXXX XXXX XXXX BBB0
shl ax, 1 ; AX = XXXX XXXX XXXB BB00
and ax, dx ; AX = 0000 0000 000B BB00
add ax, si ; AX = ®s[B]
stosw ; oprB = ®s[B]
pop ax ; the current instruction
jmp [htXXXX+bx] ; execute behaviour
; decode handler for reg / reg / reg
; instruction layout: XXXXXXXAAABBBCCC
aligncc
rrr: shl ax, 1 ; AX = XXXX XXAA ABBB CCC0
shl ax, 1 ; AX = XXXX XAAA BBBC CC00
and ax, dx ; AX = 0000 0000 000C CC00
add ax, si ; AX = ®s[C]
stosw ; oprC = ®s[C]
shr cx, 1 ; CX = 0XXX XXXX AAAB BBCC
mov ax, cx
and ax, dx ; AX = 0000 0000 000B BB00
add ax, si ; AX = ®s[B]
stosw ; oprB = ®s[B]
xchg ax, cx
mov cl, 3 ; prepare shift amount
shr ax, cl ; AX = 0000 XXXX 000A AABB
and ax, dx ; AX = 0000 0000 000A AA00
add ax, si ; AX = ®s[A]
stosw ; oprA = ®s[A]
pop ax ; the current instruction
jmp [htXXXX+bx] ; execute behaviour
section .data
align 2, db 0
; first level handler jump table: decode the top 4 instruction bits
htXXXX dw h000 ; 000XX shift immediate
dw h000 ; 00011 add/subtract register/immediate
dw h001 ; 001XX add/subtract/compare/move immediate
dw h001
dw h0100 ; 010000XXXX data-processing register
; 010001XX special data processing
; 01001 LDR (literal pool)
dw h0101 ; 0101 load/store register offset
dw h011 ; 011XX load/store word/byte immediate offset
dw h011
dw h1000 ; 1000X load/store halfword immediate offset
dw h1001 ; 1001X load from/store to stack
dw h1010 ; 1010X add to SP or PC
dw h1011 ; 1011XXXX miscellaneous instructions
dw h1100 ; 1100X load/store multiple
dw h1101 ; 1101XXXX conditional branch
; 11011110 undefined instruction
; 11011111 service call
dw h1110 ; 11100 B (unconditional branch)
dw h1111 ; 11110 branch and misc. control
; Jump table for instructions 000XXX interleaved with the
; jump table for add/subtract/compare/move immediate.
ht000XX dw h00000 ; LSLS Rd, Rm, #imm5
ht001XX dw h00100 ; MOVS Rd, #imm8
dw h00001 ; LSR Rd, Rm, #imm5
dw h00101 ; CMP Rd, #imm8
dw h00010 ; ASR Rd, Rm, #imm5
dw h00110 ; ADDS Rd, #imm8
dw h00011 ; ADD/SUB register/immediate
dw h00111 ; SUBS Rd, #imm8
; jump table for data-processing register instructions
ht010000XXXX:
dw h0100000000 ; ANDS Rdn, Rm
dw h0100000001 ; EORS Rdn, Rm
dw h0100000010 ; LSLS Rdn, Rm
dw h0100000011 ; LSRS Rdn, Rm
dw h0100000100 ; ASRS Rdn, Rm
dw h0100000101 ; ADCS Rdn, Rm
dw h0100000110 ; SBCS Rdn, Rm
dw h0100000111 ; RORS Rdn, Rm
dw h0100001000 ; TST Rn, Rm
dw h0100001001 ; RSBS Rdn, Rm, #0
dw h0100001010 ; CMP Rn, Rm
dw h0100001011 ; CMN Rn, Rm
dw h0100001100 ; ORRS Rdn, Rm
dw h0100001101 ; MULS Rdn, Rm
dw h0100001110 ; BICS Rdn, Rm
dw h0100001111 ; MVNS Rdn, Rm
; jump table for special data-processing instructions
ht010001XX:
dw h01000100 ; ADD Rdn, Rm
dw h01000101 ; CMP Rd, Rm
dw h01000110 ; MOV Rd, Rm
dw h01000111 ; BX Rm, BLX Rm
; jump table for load/store register offset
ht0101 dw str ; 0101000 STR Rt, [Rn, Rm]
dw strh ; 0101001 STRH Rt, [Rn, Rm]
dw strb ; 0101010 STRB Rt, [Rn, Rm]
dw ldrsb ; 0101011 LDRSB Rt, [Rn, Rm]
dw ldr ; 0101100 LDR Rt, [Rn, Rm]
dw ldrh ; 0101101 LDRH Rt, [Rn, Rm]
dw ldrb ; 0101110 LDRB Rt, [Rn, Rm]
dw ldrsh ; 0101111 LDRSH Rt, [Rn, Rm]
; jump table for load/store immediate offset
ht011 dw strhimm ; 10000 STRH Rt, [Rn, #imm5]
dw ldrhimm ; 10001 LDRH Rt, [Rn, #imm5]
dw undefined
dw undefined
dw strimm ; 01100 STR Rt, [Rn, #imm5]
dw ldrimm ; 01101 LDR Rt, [Rn, #imm5]
dw strbimm ; 01110 STRB Rt, [Rn, #imm5]
dw ldrbimm ; 01111 LDRB Rt, [Rn, #imm5]
; jump table for the miscellaneous instructions 1011XXXX
; instructions in parentheses are not available on Cortex-M0
; cores and generate an undefined instruction exception.
ht1011XXXX:
dw h10110000 ; ADD/SUB SP, SP, #imm7
dw h10110001 ; (CBZ Rn, #imm5)
dw h10110010 ; SXTB/SXTH/UXTB/UXTH
dw h10110011 ; (CBZ Rn, #imm5)
dw h1011010 ; PUSH {...}
dw h1011010 ; PUSH {..., LR}
dw h10110110 ; CPS
dw h10110111 ; escape hatch
dw h10111000 ; undefined
dw h10111001 ; (CBNZ Rn, #imm5)
dw h10111010 ; REV/REV16/REVSH
dw h10111011 ; (CBNZ Rn, #imm5)
dw h1011110 ; POP {...}
dw h1011110 ; POP {..., PC}
dw h10111110 ; BKPT #imm8
dw h10111111 ; (IT), hints
; jump table for (un)signed byte/halfword extend
; and jump table for reverse bytes
htB2BA dw hB200 ; SXTH Rd, Rm
dw hB201 ; SXTB Rd, Rm
dw hB210 ; UXTH Rd, Rm
dw hB211 ; UXTB Rd, Rm
dw hBA00 ; REV Rd, Rm
dw hBA01 ; undefined
dw hBA10 ; REV16 Rd, Rm
dw hBA11 ; REVSH Rd, Rm
section .text
; 000XXAAAAABBBCCC shift immediate
; 00011XYAAABBBCCC add/subtract register/immediate
aligncc
h000: mov bl, ah ; BL = 000XXAAA
shr bl, 1 ; BL = 0000XXAA
and bx, 0x0c ; BL = 0000XX00
mov si, [bp+oprB] ; SI = ®s[Rm]
mov di, [bp+oprC] ; DI = ®s[Rd]
mov [bp+zsreg], di ; set SF and ZF according to Rd
mov cx, [bp+oprA] ; CL = imm5 (or Rm for 000110...)
test cx, cx ; if CL == 0 and it's not LSLS, adjust to 32
jz .adj
jmp [ht000XX+bx] ; call instruction specific handler
align 2
.adj: test bl, bl ; is this LSLS?
jz h0000000000
mov cl, 32 ; otherwise adjust CL to 32
jmp [ht000XX+bx]
; 0000000000BBBCCC MOVS Rd, Rm
aligncc
h0000000000:
movsw ; Rd = Rm
movsw
ret
; 00011XXAAABBBCCC add/sub register/immediate
aligncc
h00011: test ax, 0x0400 ; is this register or immediate?
jnz h000111
; 0001100AAABBBCCC ADDS Rd, Rn, Rm
; 0001101AAABBBCCC SUBS Rd, Rn, Rm
mov bx, cx ; need BX to form an address
mov cx, [bx] ; DX:CX = Rm
mov dx, [bx+hi]
test ax, 0x0200 ; is this ADDS or SUBS?
jz .adds
not cx ; complement DX:CX and set CF
not dx
stc
.adds: adc cx, [si] ; DX:CX = ADDS ? Rn + Rm : Rn - Rm
adc dx, [si+hi]
mov [di], cx ; Rd = DX:CX
mov [di+hi], dx
pushf ; remember CF and OF
pop word [bp+flags]
ret
; 0001110AAABBBCCC ADDS Rd, Rn, #imm3
; 0001111AAABBBCCC SUBS Rd, Rn, #imm3
aligncc
h000111:add ax, -0x1e00 ; CF = instruction is SUBS
xchg ax, cx ; AX = 000BBCCC
sbb dx, dx ; DX = ADD ? 0 : -1
and al, 0x07 ; AX = #imm3
xor ax, dx ; AX = ADD ? #imm3 : ~#imm3
sahf ; CF = instruction is SUBS
adc ax, [si] ; DX:AX = ADDS ? Rn + #imm3 : Rn - #imm3
adc dx, [si+hi]
stosw ; Rd = DX:AX
mov [di], dx
pushf ; remember CF and OF in flags
pop word [bp+flags]
ret
; 001XXBBBCCCCCCCC add/subtract/compare/move immediate
aligncc
h001: mov bl, ah ; BL = 001XXAAA
shr bl, 1 ; BL = 0001XXAA
and bx, 0xc ; BL = 0000XX00
xor si, si ; SI = 0
mov di, [bp+oprB] ; DI = ®s[Rd]
xor ah, ah ; AX = #imm8
mov [bp+zsreg], di ; set SF and ZF according to Rd
jmp [ht001XX+bx] ; call instruction specific handler
; 00100BBBCCCCCCCC MOVS Rd, #imm8
aligncc
h00100: stosw ; Rd = #imm8
mov [di], si
ret
; 00101BBBCCCCCCCC CMP Rn, #imm8
aligncc
h00101: mov dx, [di+hi]
cmp [di], ax ; Rd(lo) - #imm8 (for ZF and borrow)
lahf ; remember ZF according to Rd(lo) - #imm8 in AH
sbb dx, si ; Rd - #imm8 (for CF, SF, OF, and ZF)
cmc ; adjust CF to ARM conventions
pushf ; load flags in DX
pop dx
or ah, ~ZF ; isolate ZF in AH
and dl, ah ; set ZF in DX if Rn == Rm
mov [bp+flags], dx ; save flags in flags
mov [bp+zsreg], si ; mark flags as fixed
ret
; 00110BBBCCCCCCCC ADDS Rd, #imm8
aligncc
h00110: add [di], ax ; Rd += AX
adc [di+hi], si
pushf ; remember flags
pop word [bp+flags]
ret
; 00111BBBCCCCCCCC SUBS Rd, #imm8
aligncc
h00111: sub [di], ax ; Rd -= AX
sbb [di+hi], si
cmc ; adjust CF to ARM conventions
pushf ; and remember flags
pop word [bp+flags]
ret
; special decode handler for instructions starting with 0100
; 010000... is decoded as imm5 / reg / reg (imm4, really)
; 010001... is decoded in a special manner
; 01001... is decoded as reg / imm8
aligncc
d0100: test ax, 0x0800 ; is this 01001...?
jnz rimm8 ; if yes, decode as reg / imm8
test ax, 0x0400 ; else, is this 010000...?
jz imm5rr ; if yes, decode as imm5 / reg / reg
; if we get here, we have instruction 0100 01XX CBBB BCCC
; note how the C operand is split in two!
shl ax, 1 ; AX = XXXX XXXC BBBB CCC0
shl ax, 1 ; AX = XXXX XXCB BBBC CC00
and ax, dx ; AX = 0000 0000 000C CC00
shr cx, 1 ; CX = 0010 001X XCBB BBCC
mov dx, cx ; make a copy for masking
shr dx, 1 ; DX = 0001 0001 XXCB BBBC
and dx, 0x20 ; DX = 0000 0000 00C0 0000
or ax, dx ; AX = 0000 0000 00CC CC00
add ax, si ; AX = ®s[C]
stosw ; oprC = ®s[C]
xchg cx, ax
and ax, 0x3c ; AX = 0000 0000 00BB BB00
add ax, si ; AX = ®s[B]
stosw ; oprB = ®s[B]
pop ax ; the current instruction
; fallthrough
; 010000AAAABBBCCC data-processing register
; 010001AACBBBBCCC special data processing
; 01001BBBCCCCCCCC LDR Rd, [PC, #imm8]
h0100: mov di, [bp+oprC] ; DI = &Rdn
mov si, [bp+oprB] ; SI = &Rm
test ax, 0x0800 ; is this LDR Rd, [PC, #imm8]?
jnz .ldr
test ax, 0x0400 ; else, is this special data processing?
jnz .sdp ; otherwise, it's data-processing register
mov [bp+zsreg], di ; set flags according to Rdn
mov bx, [bp+oprA] ; BX = 0000AAAA
shl bx, 1 ; BX = 000AAAA0
jmp [ht010000XXXX+bx]
aligncc
.sdp: mov bl, ah ; BL = 010001AA
and bx, 0x03 ; BX = 000000AA
shl bx, 1 ; BX = 00000AA0
lea cx, rlo(15) ; CX = &PC
jmp [ht010001XX+bx]
; 01001BBBCCCCCCCC LDR Rt, [PC, #imm8]
aligncc
.ldr: xchg di, si ; set up DI = &Rt, SI = #imm8
fixRd ; set flags on Rd if needed
mov ax, rlo(15) ; AX = R15(lo)
shl si, 1 ; SI = #imm8 << 2 + 2
inc si
shl si, 1
xor cx, cx
add ax, si ; CX:AX = R15 + #imm8
adc cx, rhi(15)
and al, ~3 ; align to word boundary
jmp ldr ; perform the actual load
fixrest
; 0100000000BBBCCC ANDS Rdn, Rm
aligncc
h0100000000:
lodsw ; AX = Rm(lo)
and [di], ax ; Rdn(lo) &= Rm(lo)
lodsw ; AX = Rm(hi)
and [di+hi], ax ; Rdn(hi) &= Rm(hi)
ret
; 0100000001BBBCCC EORS Rdn, Rm
aligncc
h0100000001:
lodsw ; AX = Rm(lo)
xor [di], ax ; Rdn(lo) ^= Rm(lo)
lodsw ; AX = Rm(hi)
xor [di+hi], ax ; Rdn(hi) ^= Rm(hi)
ret
; 0100000010BBBCCC LSLS Rdn, Rm
aligncc
h0100000010:
mov cl, [si] ; CL = Rm
test cl, cl ; no shift?
jz h00000.ret
cmp cl, 32 ; shift by more than 32?
ja h00000.hi
mov si, di ; SI = DI = Rdn
; fallthrough
; 00000AAAAABBBCCC LSLS Rd, Rm, #imm5
align 2
h00000: cmp cl, 16 ; shift by more than 16?
jbe .lo
; shift by 16 < CL <= 32
sub cl, 16 ; adjust shift amount to 0 < CL <= 16
lodsw ; AX = Rm(lo),
shl ax, cl ; AX = Rm(lo) << imm5 - 16
mov word [di], 0 ; Rd(lo) = 0
mov [di+hi], ax ; Rd(hi) = Rm(lo) << imm5 - 16
lahf ; update CF in flags
mov [bp+flags], ah
.ret: ret
; shift by 0 < CL <= 16
aligncc
.lo: lodsw ; AX = Rm(lo)
mov bx, ax ; keep a copy
shl ax, cl ; AX = Rm(lo) << #imm5
stosw ; Rd(lo) = Rm(lo) << #imm5
mov si, [si] ; SI = Rm(hi)
shl si, cl ; SI = Rm(hi) << #imm5
lahf ; update CF in flags
mov [bp+flags], ah
dec cx ; CL = 16 - CL
xor cx, 15
shr bx, cl ; BX = Rm(lo) >> 16 - #imm5
lea ax, [bx+si] ; AX = Rm(hi) << #imm5 | Rm(lo) >> 16 - #imm5
stosw ; Rd(hi) = Rm << #imm5 (hi)
ret
; shift by 32 < CL
aligncc
.hi: xor ax, ax ; Rd = 0
stosw
stosw
mov byte [bp+flags], al ; clear CF in flags
ret
; 0100000011BBBCCC LSRS Rdn, Rm
aligncc
h0100000011:
mov cl, [si] ; CL = Rm
test cl, cl ; no shift?
jz h00001.ret
cmp cl, 32 ; shift by more than 32?
ja h00000.hi ; same as lsl by more than 32
mov si, di ; SI = DI = Rdn
; 00001AAAAABBBCCC LSRS Rd, Rm, #imm5
h00001: cmp cl, 16 ; shift by more than 16?
jbe .lo
; shift by 16 < CL <= 32
sub cl, 16 ; CL = imm5 - 16
mov ax, [si+hi] ; AX = Rm(hi)
shr ax, cl ; AX = Rm(hi) >> imm5 - 16
stosw ; Rd(lo) = Rm(hi) >> imm5 - 16
lahf ; update CF, SF, and ZF in flags
mov [bp+flags], ah
xor ax, ax
stosw ; Rd(hi) = 0
.ret: ret
; shift by 0 < CL <= 16
aligncc
.lo: lodsw ; DX = Rm(lo)
xchg ax, dx
shr dx, cl ; DX = Rm(lo) >> #imm5
lahf ; update CF in flags
mov [bp+flags], ah
lodsw ; AX = Rm(hi)
mov si, ax ; keep a copy
shr si, cl ; SI = Rm(hi) >> #imm5
mov [di+hi], si ; Rd(hi) = Rm(hi) >> #imm5
dec cx ; CL = 16 - CL
xor cx, 15
shl ax, cl ; AX = Rm(hi) << 16 - #imm5
or ax, dx ; AX = Rm(hi) << 16 - #imm5 | Rm(lo) >> #imm5
stosw ; Rd(lo) = Rm >> #imm5 (lo)
ret
; 0100000100BBBCCC ASRS Rdn, Rm
aligncc
h0100000100:
mov cl, [si] ; CL = Rm
test cl, cl ; shift by 0?
jz h00010.ret
cmp cl, 32 ; shift by 32 or more?
jae h00010.hi
mov si, di ; SI = DI = Rdn
; 00010AAAAABBBCCC ASRS Rd, Rm, #imm5
h00010: cmp cl, 16 ; shift by more than 16?
jbe .lo
; shift by 16 < CL < 32
sub cl, 16 ; CL = imm5 - 16
mov ax, [si+hi] ; AX = Rm(hi)
sar ax, cl ; AX = Rm(hi) >> imm5 - 16
cwd ; DX = Rm(hi) < 0 ? -1 : 0
stosw ; Rd = DX:AX
lahf ; update CF in flags
mov [bp+flags], ah
xchg ax, dx
stosw
.ret: ret
; shift by 0 < CL <= 16
aligncc
.lo: lodsw ; DX = Rm(lo)
xchg ax, dx
shr dx, cl ; DX = Rm(lo) >> #imm5
lahf ; update CF in flags
mov [bp+flags], ah
lodsw ; AX = Rm(hi)
mov si, ax ; keep a copy
sar si, cl ; SI = Rm(hi) >> #imm5
mov [di+hi], si ; Rd(hi) = Rm(hi) >> #imm5
dec cx ; CL = 16 - CL
xor cx, 15
shl ax, cl ; AX = Rm(hi) << 16 - #imm5
or ax, dx ; AX = Rm(hi) << 16 - #imm5 | Rm(lo) >> #imm5
stosw ; Rd(lo) = Rm >> #imm5 (lo)
ret
; shift by 32 <= CL
aligncc
.hi: mov ah, [si+hi+1] ; AH = Rm(hi) (high byte)
cwd ; DX = Rm < 0 ? -1 : 0
mov [bp+flags], dl ; set CF depending on DX
xchg ax, dx ; Rd = DX:DX
stosw
stosw
ret
; 0100000101BBBCCC ADCS Rdn, Rm
aligncc
h0100000101:
mov ah, [bp+flags] ; restore CF from flags
sahf
lodsw ; AX = Rm(lo)
adc [di], ax ; Rdn(lo) += Rm(lo) + CF
lodsw ; AX = Rm(hi)
adc [di+hi], ax ; Rdn(hi) += Rm(hi) + CF
pushf ; remember CF and OF in flags
pop word [bp+flags]
ret
; 0100000110BBBCCC SBCS Rdn, Rm
aligncc
h0100000110:
mov ah, [bp+flags] ; restore CF from flags
sahf
cmc ; adapt CF from ARM conventions
lodsw
sbb [di], ax ; Rdn(lo) -= Rm(lo) - 1 + CF
lodsw ; AX = Rm(hi)
sbb [di+hi], ax ; Rdn(hi) += Rm(hi) + CF
cmc ; adjust CF to ARM conventions
pushf ; remember CF and OF in flags
pop word [bp+flags]
ret
; 0100000111BBBCCC RORS Rdn, Rm
aligncc
h0100000111:
mov cl, [si] ; CL = Rm
and cl, 31 ; mask out useless extra rotates
jz .ret ; no rotate?
mov ax, [di] ; DX:AX = Rdn
mov dx, [di+hi]
cmp cl, 16 ; rotating by 16 or more?
jb .lo
sub cl, 16 ; if yes, reduce to rotation to 0 <= cl < 16
xchg ax, dx ; with a pre-rotate by 16
; rotate by 0 <= CL < 16
.lo: mov bx, ax ; BX = Rdn(lo)
shr ax, cl ; AX = Rdn(lo) >> CL
mov si, dx ; SI = Rdn(hi)
shr si, cl ; DX = Rdn(hi) >> CL
xor cx, 15 ; CL = 16 - CL
inc cx
shl dx, cl ; DX = Rdn(hi) << 16 - CL
or ax, dx ; AX = Rdn(lo) >> CL | Rdn(hi) << 16 - CL
stosw ; Rdn(lo) = Rdn ror CL (lo)
shl bx, cl ; BX = Rdn(lo) << 16 - CL
lea ax, [bx+si] ; AX = Rdn(hi) >> CL | Rdn(lo) << 16 - CL
stosw ; Rdn(hi) = Rdn ror CL (hi)
rol ah, 1 ; shift Rdn sign bit into LSB of AH
mov [bp+flags], ah ; and deposit into flags as CF
.ret: ret
; 0100001000BBBCCC TST Rn, Rm
aligncc
h0100001000:
lodsw ; DX:AX = Rm
mov dx, [si]
test [di], ax ; set ZF according to Rm(lo) & Rn(lo)
lahf
mov al, ah ; AL = Rm(lo) & Rn(lo) flags
test [di+hi], dx ; set ZF and SF according Rm(hi) & Rn(hi)
lahf
or al, ~ZF ; isolate ZF in AL
and ah, al ; AH = SF, ZF according to Rm(lo) & Rn(lo)
mov al, [bp+flags]
and ax, (ZF|SF)<<8|~(ZF|SF)&0xff
; mask AL to all but ZF and SF,
; AH to just ZF and SF
or al, ah ; merge the two
mov [bp+flags], al ; write them back
mov word [bp+zsreg], 0 ; mark flags as being fixed
ret
; 0100001001BBBCCC RSBS Rd, Rm, #0
; CF = Rn == 0
aligncc
h0100001001:
lodsw ; AX = Rm(lo)
neg ax ; AX = -AX
stosw
sbb ax, ax ; AX = Rm(lo) == 0 ? -1 : 0
sub ax, [si] ; AX = -Rm(hi) - carry
stosw ; Rd = DX:AX
cmc ; adjust CF to ARM conventions
pushf
pop word [bp+flags] ; remember OF and CF in flags
ret
; 0100001010BBBCCC CMP Rn, Rm
; 01000101CBBBBCCC CMP Rn, Rm
aligncc
h01000101:
h0100001010:
lodsw ; AX = Rm(lo)
mov dx, [di+hi] ; DX = Rn(hi)
cmp [di], ax ; set flags according to Rn(lo) - Rm(lo)
lahf ; and remember ZF in AH
sbb dx, [si] ; set CF, SF, and OF according to Rn - Rm
cmc ; adapt CF to ARM conventions
.flags: pushf ; load flags into DX
pop dx
or ah, ~ZF ; isolate ZF in AH
and dl, ah ; set ZF in DX if Rn == Rm
mov [bp+flags], dx ; save flags in flags
mov word [bp+zsreg], 0 ; mark flags as fixed
ret
; 0100001011BBBCCC CMN Rn, Rm
aligncc
h0100001011:
lodsw ; AX = Rm(lo)
add ax, [di] ; set flags according to Rn(lo) + Rm(lo)
lahf ; and remember ZF in AH
mov dx, [si] ; DX = Rm(hi)
adc dx, [di+hi] ; set CF, SF, and OF according to Rn - Rm
jmp h0100001010.flags ; rest is the same as with CMP Rn, Rm
; 0100001100BBBCCC ORRS Rd, Rm
aligncc
h0100001100:
lodsw ; AX = Rm(lo)
or [di], ax ; Rdn(lo) |= Rm(lo)
lodsw ; AX = Rm(hi)
or [di+hi], ax ; Rdn(hi) |= Rm(hi)
ret
; 0100001101BBBCCC MULS Rd, Rm
aligncc
h0100001101:
lodsw ; AX = Rm(lo)
mov bx, ax ; remember a copy
mul word [di] ; DX:AX = Rm(lo) * Rd(lo)
mov cx, dx ; CX = Rm(lo) * Rd(lo) (hi)
xchg bx, ax ; BX = Rm * Rd (lo), AX = Rm(lo)
mul word [di+hi] ; AX = Rm(lo) * Rd(hi) (lo), DX = junk
add cx, ax ; CX = Rm(lo)*Rd(lo) (hi) + Rm(lo)*Rd(hi) (lo)