| Here's my entry. It's really the same algorithm as in .1, but the loops
are unrolled and it uses a bitvector to remember which cards were
present. It is also reusable (.1 is not, because it does not
reinitialise the TEMP byte vector)
Andrew
.title sort13
.psect sort13,rd,exe,nowrt,page
; REQUIRE FILES
$ssdef
; MACRO DEFINITIONS
; macro to generate an instruction for scanning the input array
.macro scan_in ?l1
bbss (r0)+,(sp),l1
l1:
.endm
; macro to generate instructions for scanning the output array
.macro scan_out x,?l2
.iif lt, x-32, bbc #x,r2,l2
.iif ge, x-32, bbc #x-32,r3,l2
movl #x,(r1)+
l2:
.endm
; DATA DEFINITIONS
; data inserted to make the input scan instructions be aligned
.blkb 1
;++
;
; FUNCTIONAL DESCRIPTION:
;
; Sorts an array of 13 integers, each integer is in the
; range 1 to 52, with no duplicates
; This routine is reusable and reentrant
;
; FORMAL PARAMETERS:
;
; sortarray - an integer array, read and written
; the address of the first longword in the array
; is passed
;
; IMPLICIT INPUTS:
;
; none
;
; IMPLICIT OUTPUTS:
;
; none
;
; ROUTINE VALUE:
;
; none
;
; COMPLETION CODES:
;
; ss$_normal
;
; SIDE EFFECTS:
;
; none
;
;--
.entry sort13, ^m<r2,r3>
movl 4(ap), r0 ; address of array
movl r0, r1 ; copy for second loop
; initialise bitvector (pity we cant do this in registers)
clrq -(sp)
; scan the input array
; note that these instructions are aligned on longword boundaries
; in case it makes it go faster !
; this is not done for the output loop, as each set of instructions
; is 7 bytes long, putting an extra NOOP in would probably slow it
; down more in the cases the branch is not taken than would be gained
; if the branch is taken
.repeat 13
scan_in
.endr
; get bitvector into registers
movq (sp)+,r2
; scan the bit vector, creating the output array
bitno=1
.repeat 52
scan_out bitno
bitno=bitno+1
.endr
; set up status code
movl #ss$_normal,r0
ret
.end
|
| re .-1
Actually you could reduce the number of taken branches when scanning
the bitvector for a further speedup by threading together some code
like this, where r2,r3 are hold the bitvector. You basically fall thru
till you hit another card, when you branch to store it and fall thru
some more...
100$:
bbs #0,r2,101$
bbs #1,r2,102$
bbs #2,r2,103$
...
bbs #52-31,r3,151$
bbs #52-32,r3,152$
ret
101$: movzbl #1,(r1)+
bbs #1,r2,102$
bbs #2,r2,103$
...
bbs #52-31,r3,151$
bbs #52-32,r3,152$
ret
102$: movzbl #2,(r1)+
bbs #2,r2,103$
bbs #3,r2,104$
...
bbs #52-31,r3,151$
bbs #52-32,r3,152$
ret
151$: movzbl #51,(r1)+
bbs #52-32,r3,152$
ret
...
152$: movzbl #52,(r1)+
ret
- Jim
|
| .title sort13
deck_size= 52
hand_size= 13
.psect data, long
c.1: .long 1
c.2: .long 2
c.3: .long 3
c.4: .long 4
c.5: .long 5
c.6: .long 6
c.7: .long 7
c.8: .long 8
c.9: .long 9
c.10: .long 10
c.11: .long 11
c.12: .long 12
c.13: .long 13
c.14: .long 14
c.15: .long 15
c.16: .long 16
c.17: .long 17
c.18: .long 18
c.19: .long 19
c.20: .long 20
c.21: .long 21
c.22: .long 22
c.23: .long 23
c.24: .long 24
c.25: .long 25
c.26: .long 26
c.27: .long 27
c.28: .long 28
c.29: .long 29
c.30: .long 30
c.31: .long 31
c.32: .long 32
c.33: .long 33
c.34: .long 34
c.35: .long 35
c.36: .long 36
c.37: .long 37
c.38: .long 38
c.39: .long 39
c.40: .long 40
c.41: .long 41
c.42: .long 42
c.43: .long 43
c.44: .long 44
c.45: .long 45
c.46: .long 46
c.47: .long 47
c.48: .long 48
c.49: .long 49
c.50: .long 50
c.51: .long 51
c.52: .long 52
temp: .blkw deck_size+1
.psect code
.entry sort13, ^m<r2, r3, r4, r5, r6>
movc5 #0, (r0), #0, #<deck_size+1>*2, temp
movl 4(ap), r0
.repeat hand_size
movl (r0)+, r2
movw #4, temp[r2]
.endr
movaw temp+2, r6
movl 4(ap), r3
movc (r6)+, c.1, (r3)+
movc (r6)+, c.2, (r3)+
movc (r6)+, c.3, (r3)+
movc (r6)+, c.4, (r3)+
movc (r6)+, c.5, (r3)+
movc (r6)+, c.6, (r3)+
movc (r6)+, c.7, (r3)+
movc (r6)+, c.8, (r3)+
movc (r6)+, c.9, (r3)+
movc (r6)+, c.10, (r3)+
movc (r6)+, c.11, (r3)+
movc (r6)+, c.12, (r3)+
movc (r6)+, c.13, (r3)+
movc (r6)+, c.14, (r3)+
movc (r6)+, c.15, (r3)+
movc (r6)+, c.16, (r3)+
movc (r6)+, c.17, (r3)+
movc (r6)+, c.18, (r3)+
movc (r6)+, c.19, (r3)+
movc (r6)+, c.20, (r3)+
movc (r6)+, c.21, (r3)+
movc (r6)+, c.22, (r3)+
movc (r6)+, c.23, (r3)+
movc (r6)+, c.24, (r3)+
movc (r6)+, c.25, (r3)+
movc (r6)+, c.26, (r3)+
movc (r6)+, c.27, (r3)+
movc (r6)+, c.28, (r3)+
movc (r6)+, c.29, (r3)+
movc (r6)+, c.30, (r3)+
movc (r6)+, c.31, (r3)+
movc (r6)+, c.32, (r3)+
movc (r6)+, c.33, (r3)+
movc (r6)+, c.34, (r3)+
movc (r6)+, c.35, (r3)+
movc (r6)+, c.36, (r3)+
movc (r6)+, c.37, (r3)+
movc (r6)+, c.38, (r3)+
movc (r6)+, c.39, (r3)+
movc (r6)+, c.40, (r3)+
movc (r6)+, c.41, (r3)+
movc (r6)+, c.42, (r3)+
movc (r6)+, c.43, (r3)+
movc (r6)+, c.44, (r3)+
movc (r6)+, c.45, (r3)+
movc (r6)+, c.46, (r3)+
movc (r6)+, c.47, (r3)+
movc (r6)+, c.48, (r3)+
movc (r6)+, c.49, (r3)+
movc (r6)+, c.50, (r3)+
movc (r6)+, c.51, (r3)+
movc (r6)+, c.52, (r3)+
ret
.end
|
| .title sort13
deck_size= 52
hand_size= 13
sort: .blkl deck_size+1
s.siz= .- sort
temp: .blkl hand_size+1
.entry sort13, ^m<r2, r3, r4, r5>
movc3 #hand_size*4, @4(ap), temp+4
movc5 #0, (r0), #0, #s.siz, sort
movl 4(ap), r0
;
; movl #1, r1
;10$: movl (r0)+, r2
; movl r1, sort[r2]
; aobleq #13, r1, 10$
;
movl (r0)+, r2
movl #1, sort[r2]
movl (r0)+, r2
movl #2, sort[r2]
movl (r0)+, r2
movl #3, sort[r2]
movl (r0)+, r2
movl #4, sort[r2]
movl (r0)+, r2
movl #5, sort[r2]
movl (r0)+, r2
movl #6, sort[r2]
movl (r0)+, r2
movl #7, sort[r2]
movl (r0)+, r2
movl #8, sort[r2]
movl (r0)+, r2
movl #9, sort[r2]
movl (r0)+, r2
movl #10, sort[r2]
movl (r0)+, r2
movl #11, sort[r2]
movl (r0)+, r2
movl #12, sort[r2]
movl (r0)+, r2
movl #13, sort[r2]
movl 4(ap), r0
moval sort+4, r2
movl #hand_size, r1
20$: movl (r2)+, r3
beql 20$
movl temp[r3], (r0)+
sobgtr r1, 20$
ret
.end
|