; $Id$
; mfmpasm.a assembler routines for Metafont and Metapost
; By Jakob Stoklund Olesen

; condition code HS = unsigned Higher or Same =  CS
; condition code LO = unsigned Lower = CC

; We need some macros for the division routines
; CmpBr shift, label: if a1 < a2<<shift B label
	MACRO	CmpBr, shift, label
	cmp	a1, a2, lsl #shift
	bcc	label
	ENDM

; HDiv shift: perform high-bit division. For bits with value>=1
; a1=remainder, a2=divisor, a3=result
; pre-condition: a1 < a2<<(shift+1)
; post-condition: a1 < a2<<shift
	MACRO	HDiv, shift
	cmp	a1, a2, lsl #shift
	adc	a3, a3, a3		; shift a3, add carry
	subcs	a1, a1, a2, lsl #shift	; HS = Carry Set
	ENDM

; LDiv: perform low-bit division. For bits with value < 1
; a1=shifted remainder to previous bits scale, a2=divisor, a3=result
; pre-condition: a1 < a2
; post-condition: a1 shifted up current bit's scale
	MACRO	LDiv
	mov	a1, a1, lsl #1		; shift remainder to our scale
	cmp	a1, a2
	adc	a3, a3, a3		; shift a3, add carry
	subcs	a1, a1, a2		; HS = Carry Set
	ENDM

	AREA	C$$Code, CODE, READONLY

; overflow: common code.
; Set aritherror and return el_gordo with same sign as ip
overflow_stacked:
	ldmfd	sp!, {lr}
overflow:
	ldr	a1, ae_addr	; get the address of aritherror
	mov	a2, #1
	str	a2, [a1]	; aritherror:=true
	mov	a1, #&80000000	; smallest negative number
	tst	ip, a1		; negative?
	subeq	a1, a1, #1	; =  el_gordo = &7fffffff
	addne	a1, a1, #1	; = -el_gordo = &80000001
	movs	pc, lr
ae_addr:
	IMPORT	aritherror
	dcd	aritherror

; zmakefraction
; calculate floor(a1 * 2^28 / a2 + 1/2) when a1, a2 positive.
; if overflow set aritherror true and return 2^31-1

	EXPORT	zmakefraction
zmakefraction:
	movs	ip, a1		; keep sign in ip
	moveqs	pc, lr		; 0/? = 0, no questions.
	rsbmi	a1, a1, #0	; make a1 positive
	cmp	a2, #0
	beq	overflow	; division by zero
	eor	ip, ip, a2	; ip has the sign of the result
	rsblt	a2, a2, #0	; force positive
; Now for the division a1<<28/a2
	mov	a3, #0		; a3 holds the result
; if a1 < a2 we can go directly to fractional part calculation
	cmp	a1, a2
	bcc	mf_b27
; otherwise try to shift up a2, but no more than twice
	CmpBr	1, mf_b28
	CmpBr	2, mf_b29
	cmp	a1, a2, lsl #3
	bcs	overflow	; result must be less than 2^31

mf_b30:	HDiv	2
mf_b29:	HDiv	1
mf_b28:	HDiv	0
; a1 unshifted
mf_b27:	LDiv
; a1 shifted 1
mf_b26:	LDiv
mf_b25:	LDiv
mf_b24:	LDiv
mf_b23:	LDiv
mf_b22:	LDiv
mf_b21:	LDiv
mf_b20:	LDiv
mf_b19:	LDiv
mf_b18:	LDiv
mf_b17:	LDiv
mf_b16:	LDiv
mf_b15:	LDiv
mf_b14:	LDiv
mf_b13:	LDiv
mf_b12:	LDiv
mf_b11:	LDiv
mf_b10:	LDiv
mf_b9:	LDiv
mf_b8:	LDiv
mf_b7:	LDiv
mf_b6:	LDiv
mf_b5:	LDiv
mf_b4:	LDiv
mf_b3:	LDiv
mf_b2:	LDiv
mf_b1:	LDiv
mf_b0:	LDiv

mf_roundoff:	; add one if remainder >= 1/2LSB
	cmp	a2, a1, lsl #1
	addls	a1, a3, #1	; round up
	movhi	a1, a3		; round down
	tst	ip, #1<<31	; ip has the result sign
	rsbne	a1, a1, #0	; restore result sign
	movs	pc, lr



; zmakescaled --- similar to zmakefraction but not so much used.
; calculate floor(a1 * 2^16 / a2 + 1/2) when a1, a2 positive.
; if overflow set aritherror true and return 2^31-1

	EXPORT	zmakescaled
zmakescaled:
	movs	ip, a1		; keep sign in ip
	moveqs	pc, lr		; 0/? = 0, no questions.
	rsbmi	a1, a1, #0	; make a1 positive
	cmp	a2, #0
	beq	overflow	; division by zero
	eor	ip, ip, a2	; ip has the sign of the result
	rsblt	a2, a2, #0	; force positive
; Now for the division a1<<16/a2
	mov	a3, #0		; a3 holds the result
; if a1 < a2 we can go directly to fractional part calculation
	cmp	a1, a2
	bcc	mf_b15	; NB zmakeFRACTION code!
; otherwise try to shift up a2, but no more than 15 times
	CmpBr	 1, ms_b16
	CmpBr	 2, ms_b17
	CmpBr	 3, ms_b18
	CmpBr	 4, ms_b19
	CmpBr	 5, ms_b20
	CmpBr	 6, ms_b21
	CmpBr	 7, ms_b22
	CmpBr	 8, ms_b23
	CmpBr	 9, ms_b24
	CmpBr	10, ms_b25
	CmpBr	11, ms_b26
	CmpBr	12, ms_b27
	CmpBr	13, ms_b28
	CmpBr	14, ms_b29
	cmp	a1, a2, lsl #15
	bcs	overflow	; result must be less than 2^31

ms_b30:	HDiv	14
ms_b29:	HDiv	13
ms_b28:	HDiv	12
ms_b27:	HDiv	11
ms_b26:	HDiv	10
ms_b25:	HDiv	9
ms_b24:	HDiv	8
ms_b23:	HDiv	7
ms_b22:	HDiv	6
ms_b21:	HDiv	5
ms_b20:	HDiv	4
ms_b19:	HDiv	3
ms_b18:	HDiv	2
ms_b17:	HDiv	1
ms_b16:	HDiv	0
	b	mf_b15	; let zmakefraction handle it from here.



; ztakefraction
; calculate floor(a1 * a2 / 2^28 + 1/2) when a1, a2 positive.
; if overflow set aritherror true and return 2^31-1

	EXPORT	ztakefraction
ztakefraction:
	movs	ip, a1		; keep sign in ip
	moveqs	pc, lr		; 0*? = 0, no questions.
	rsbmi	a1, a1, #0	; make a1 positive
	cmp	a2, #0
	eor	ip, ip, a2	; ip has the sign of the result
	rsblt	a2, a2, #0	; force positive

	stmfd	sp!, {lr}	; stack lr
; split up the operands in two parts
	movs	a3, a1, lsr #16		; high1, set flags
	mov	a4, a2, lsr #16		; high2
	bic	a1, a1, a3, lsl #16	; low1
	bic	a2, a2, a4, lsl #16	; low2
; and do a long multiply
	mul	lr, a1, a2		; low part
	mul	a2, a3, a2		; middle 1
	mul	a1, a4, a1		; middle 2
	mulne	a3, a4, a3		; high part
	adds	a1, a1, a2		; a1 = mid1+mid2
	addcs	a3, a3, #&10000		; carry to high part
	adds	a2, lr, a1, lsl #16	; a2 = low result
	adc	a3, a3, a1, lsr #16	; a3 = high result
; that was a long multiply
	tst	a3, #&f8000000		; any illegal bits?
	bne	overflow_stacked
	movs	a1, a2, lsr #28		; shift bit to C
	adcs	a1, a1, a3, lsl #4	; 32-28
	bmi	overflow_stacked	; negative => overflow
	tst	ip, #1<<31
	rsbne	a1, a1, #0		; restore sign
	ldmfd	sp!, {pc}^



; ztakescaled
; calculate floor(a1 * a2 / 2^16 + 1/2) when a1, a2 positive.
; if overflow set aritherror true and return 2^31-1

	EXPORT	ztakescaled
ztakescaled:
	movs	ip, a1		; keep sign in ip
	moveqs	pc, lr		; 0*? = 0, no questions.
	rsbmi	a1, a1, #0	; make a1 positive
	cmp	a2, #0
	eor	ip, ip, a2	; ip has the sign of the result
	rsblt	a2, a2, #0	; force positive

	stmfd	sp!, {lr}	; stack lr
; split up the operands in two parts
	movs	a3, a1, lsr #16		; high1 + flags
	mov	a4, a2, lsr #16		; high2
	bic	a1, a1, a3, lsl #16	; low1
	bic	a2, a2, a4, lsl #16	; low2
; and do a long multiply
	mul	lr, a1, a2		; low part
	mul	a2, a3, a2		; middle 1
	mul	a1, a4, a1		; middle 2
	mulne	a3, a4, a3		; high part
	adds	a1, a1, a2		; a1 = mid1+mid2
	addcs	a3, a3, #&10000		; carry to high part
	adds	a2, lr, a1, lsl #16	; a2 = low result
	adc	a3, a3, a1, lsr #16	; a3 = high result
; that was a long multiply
	movs	a1, a3, lsr #15		; any illegal bits?
	bne	overflow_stacked
	movs	a1, a2, lsr #16		; shift bit to C
	adcs	a1, a1, a3, lsl #16	; 32-16
	bmi	overflow_stacked	; negative => overflow
	tst	ip, #1<<31
	rsbne	a1, a1, #0		; restore sign
	ldmfd	sp!, {pc}^


