Subject: No 'unsigned long' math routines in libc.a (#134)
Index:	lib/pdp/crt/{ultof,ulsh,uldiv,ulrem}.s 2.11BSD

Description:
	There is a total lack of support for "unsigned long" in both
	the C compiler and the runtime library.

Repeat-By:
	Observation? ;-)

Fix:
	The enclosed update consists of two parts.  The first is
	a patch to the /usr/src/lib/libc/pdp/crt/Makefile.  The second
	part is a set of routines which implement the 'shift', 'divide',
	'remainder' and 'convert to floating point' operations.

	These routines are being posted in advance of the changes to the
	compiler itself.  Yes, i have 'u_long' implemented in the C compiler
	but want to do some more testing (at present the kernel and
	networking work when compiled with the new compiler).  You will need 
	to install these routines before the changes to the compiler are made.

	You can use these routines prior to the compiler changes because 
	there is both a C interface as well as an internal compiler
	interface to 'ultof', 'ulsh', 'uldiv', 'ulrem'.   Example:

		long	a, b, c;
		float	d, ultof();

		a = uldiv(a, b);	/* a = b/c */
		d = ultof(b);

	The ulrem and uldiv routines are slow and bulky for divisors
	greater or equal to 2^15.  Feel free to improve them - i'd like
	a copy ;-)

	NOTE:  It will be necessary to copy (or better yet make symlinks)
               these routines to the sys/pdp directory for kernel use.
	       Instructions for this will come later.

	After applying the patch and unshar'ing the included file you need
	to:

		cd /usr/src/lib/libc/pdp/crt
		make
		ar rv /lib/libc.a *.o
		cd profiled
		ar rv /usr/lib/libc_p.a *.o
		cd ..
		make clean
		ranlib /lib/libc.a /usr/lib/libc_p.a
===============================cut here===========================
*** /usr/src/lib/libc/pdp/crt/Makefile.old	Wed Feb  4 21:19:28 1987
--- /usr/src/lib/libc/pdp/crt/Makefile	Sun Jun  6 20:49:46 1993
***************
*** 3,13 ****
  # All rights reserved.  The Berkeley software License Agreement
  # specifies the terms and conditions for redistribution.
  #
! #	@(#)Makefile	5.6 (Berkeley) 1/28/87
  #
  #
! SRCS=	aldiv.s almul.s alrem.s csv.s ldiv.s lmul.s lrem.s udiv.s
! OBJS=	aldiv.o almul.o alrem.o csv.o ldiv.o lmul.o lrem.o udiv.o
  CFLAGS=	-O ${DEFS}
  TAGSFILE=tags
  
--- 3,16 ----
  # All rights reserved.  The Berkeley software License Agreement
  # specifies the terms and conditions for redistribution.
  #
! #	@(#)Makefile	5.7 (2.11BSD GTE) 6/6/93
  #
  #
! SRCS=	aldiv.s almul.s alrem.s csv.s ldiv.s lmul.s lrem.s udiv.s \
! 	uldiv.s ulrem.s ulsh.s ultof.s
! OBJS=	aldiv.o almul.o alrem.o csv.o ldiv.o lmul.o lrem.o udiv.o \
! 	uldiv.o ulrem.o ulsh.o ultof.o
! 
  CFLAGS=	-O ${DEFS}
  TAGSFILE=tags
  
***************
*** 69,74 ****
--- 72,81 ----
  lmul.o: lmul.s ./DEFS.h
  lrem.o: lrem.s ./DEFS.h
  udiv.o: udiv.s ./DEFS.h
+ uldiv.o: uldiv.s ./DEFS.h
+ ulrem.o: ulrem.s ./DEFS.h
+ ulsh.o: ulsh.s ./DEFS.h
+ ultof.o: ultof.s ./DEFS.h
  # DEPENDENCIES MUST END AT END OF FILE
  # IF YOU PUT STUFF HERE IT WILL GO AWAY
  # see make depend above

============================cut here====================================
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
#	/usr/src/lib/libc/pdp/crt/ultof.s
#	/usr/src/lib/libc/pdp/crt/ulsh.s
#	/usr/src/lib/libc/pdp/crt/ulrem.s
#	/usr/src/lib/libc/pdp/crt/uldiv.s
# This archive created: Fri Jun 11 21:46:01 1993
export PATH; PATH=/bin:/usr/bin:$PATH
if test -f '/usr/src/lib/libc/pdp/crt/ultof.s'
then
	echo shar: "will not over-write existing file '/usr/src/lib/libc/pdp/crt/ultof.s'"
else
sed 's/^X//' << \SHAR_EOF > '/usr/src/lib/libc/pdp/crt/ultof.s'
X/*
X * Program: ultof.s
X * Copyright 1993, GTE Government Systems
X * Author:  Steven M. Schultz
X *
X *  Version	Date		Modification
X *	0.0	02Feb91		1. Initial inspiration struck.
X *	1.0	05Jun93		2. Released into the Public Domain.
X*/
X
X#include "DEFS.h"
X
X/*
X * All routines have both a C interface and an assembly interface.  Normally
X * the two are the same.  In the case of 'ulsh' the compiler has placed one
X * of the operands in r0 and r1 so the assembly interface differs from the
X * C interface.
X*/
X
X#define	twogig	050000
X
X#if	!defined(KERNEL)
X/*
X * float ultof(lhs)
X *	u_long	lhs;
X *
X * unsigned 32-bit long to floating conversion.  Calls to ultof generated
X * automatically by the C compiler.  This routine is purposefully
X * not defined for the kernel since the kernel shouldn't (can't) do
X * FP arithmetic.
X */
X
X	.globl ultof
Xultof:
XENTRY(ultof)
X	jsr	pc,l2f		/ 2(sp) -> fr0
X	seti
X	rts	pc
X
X/*
X * Common sequences used more than once.  Moved here to save space at the
X * expense of a jsr+rts.  Both do a 'setl', the caller must do a 'seti'.
X * Not for the kernel until the kernel can do FP arithmetic.
X*/
X
XASENTRY(l2f)
X	setl
X	tst	4(sp)
X	bpl	1f
X	bic	$100000,4(sp)
X	movif	4(sp),fr0
X	addf	$twogig,fr0
X	rts	pc
X1:
X	movif	4(sp),fr0
X	rts	pc
X
XASENTRY(l6f)
X	setl
X	tst	8.(sp)
X	bpl	1f
X	bic	$100000,8.(sp)
X	movif	8.(sp),fr3
X	addf	$twogig,fr3
X	rts	pc
X1:
X	movif	8.(sp),fr3
X	rts	pc
X#endif KERNEL
SHAR_EOF
fi
if test -f '/usr/src/lib/libc/pdp/crt/ulsh.s'
then
	echo shar: "will not over-write existing file '/usr/src/lib/libc/pdp/crt/ulsh.s'"
else
sed 's/^X//' << \SHAR_EOF > '/usr/src/lib/libc/pdp/crt/ulsh.s'
X/*
X * Program: ulsh.s
X * Copyright 1993, GTE Government Systems
X * Author:  Steven M. Schultz
X *
X *  Version	Date		Modification
X *	0.0	02Feb91		1. Initial inspiration struck.
X *	1.0	05Jun93		2. Released into the Public Domain.
X*/
X
X#include "DEFS.h"
X
X/*
X * All routines have both a C interface and an assembly interface.  Normally
X * the two are the same.  In the case of 'ulsh' the compiler has placed one
X * of the operands in r0 and r1 so the assembly interface differs from the
X * C interface.
X*/
X
X/*
X * u_long ulsh(lhs, count)
X *	u_long	lhs;
X *	short	count;
X *
X * 32-bit "<<" and ">>" routines.  Calls to ulsh are generated 
X * automatically by the C compiler.
X */
X
XASENTRY(ulsh)
X	tst	2(sp)		/ shift count is on stack, long is in r0+r1
X	bpl	1f
X	ror	r0
X	ror	r1
X	inc	2(sp)
X1:
X	ashc	2(sp),r0
X	rts	pc
X
XENTRY(ulsh)
X	mov	2(sp),r0
X	mov	4(sp),r1
X	tst	6(sp)		/ positive count?
X	bpl	1f		/ yes - br
X	ror	r0		/ do the first shift
X	ror	r1		/    the hard way
X	inc	6(sp)		/ bump count towards zero
X1:
X	ashc	6(sp),r0	/ do the rest of the shifting
X	rts	pc
X
X/*
X * u_long ualsh(lhs, count)
X *	u_long	*lhs;
X *	short	count;
X *
X * 32-bit "<<=" and ">>=" routines.  Calls to ualsh are generated 
X * automatically by the C compiler.
X */
X	.globl	ualsh
Xualsh:
XENTRY(ualsh)
X	mov	r2,-(sp)	/ save a register
X	mov	4(sp),r2	/ *lhs
X	mov	(r2)+,r0
X	mov	(r2)+,r1
X	tst	6(sp)		/ positive count?
X	bpl	1f		/ yes - br
X	ror	r0		/ do the first shift
X	ror	r1		/    the hard way
X	inc	6(sp)		/ bump count towards zero
X1:
X	ashc	6(sp),r0	/ do the rest of the shifting
X	mov	r1,-(r2)
X	mov	r0,-(r2)
X	mov	(sp)+,r2
X	rts	pc
SHAR_EOF
fi
if test -f '/usr/src/lib/libc/pdp/crt/ulrem.s'
then
	echo shar: "will not over-write existing file '/usr/src/lib/libc/pdp/crt/ulrem.s'"
else
sed 's/^X//' << \SHAR_EOF > '/usr/src/lib/libc/pdp/crt/ulrem.s'
X/*
X * Program: ulrem.s
X * Copyright 1993, GTE Government Systems
X * Author:  Steven M. Schultz
X *
X *  Version	Date		Modification
X *	0.0	02Feb91		1. Initial inspiration struck.
X *	1.0	05Jun93		2. Released into the Public Domain.
X*/
X
X#include "DEFS.h"
X
X/*
X * All routines have both a C interface and an assembly interface.  Normally
X * the two are the same.  In the case of 'ulsh' the compiler has placed one
X * of the operands in r0 and r1 so the assembly interface differs from the
X * C interface.
X*/
X
X#define	one	040200
X
X/*
X * u_long ulrem(lhs, rhs)
X *	u_long	lhs, rhs;
X *
X * 32-bit "%" routine.  Calls to ulrem are generated automatically by the C
X * compiler.
X */
X
X#if !defined(KERNEL)
X/*
X * ulrem for applications (uses floating point).
X */
X
X	.globl	ulrem
X	.globl	l2f, l6f
X
Xulrem:
XENTRY(ulrem)
X	jsr	pc,l2f		/ 2(sp) -> fr0
X	movf	fr0,fr2		/ put in right place (fr2)
X	jsr	pc,l6f		/ 6(sp) -> fr3
X	tstf	fr3		/ check for division by zero
X	cfcc			/   don't want FP trap during
X	beq	1f		/   integer arithmetic
X	divf	fr3,fr0		/ fr0 = lhs/rhs
X	modf	$one,fr0	/ fr0 = integer((lhs/rhs) * 1.0)
X	mulf	fr3,fr1		/ fr0 = integer(lhs/rhs) * rhs
X	subf	fr1,fr2		/ fr2 = lhs - (integer(*lhs/rhs) * rhs)
X1:
X	movfi	fr2,-(sp)	/ (result)
X	mov	(sp)+,r0
X	mov	(sp)+,r1
X	seti
X	rts	pc
X#else
X/*
X * ulrem for the kernel (uses only fixed point - no FP)
X*/
X	.globl ulrem
Xulrem:
XENTRY(ulrem)
X	mov	r2,-(sp)	/ faster than csv/cret ...
X	mov	r3,-(sp)
X	mov	r4,-(sp)
X	mov	8.(sp),r0	/ r0 = hi(lhs)
X	mov	10.(sp),r1	/ r1 = lo(lhs)
X	mov	12.(sp),r2	/ r2 = hi(rhs)
X	mov	14.(sp),r3	/ r3 = lo(rhs)
X	bne	3f
X	tst	r2
X	beq	9f		/ check for divide by 0
X3:
X	clr	r4		/ init scale of lhs
X2:
X	ashc	$1,r0
X	blos	1f		/ check for zero at same time
X	inc	r4
X	br	2b
X1:
X	mov	r4,-(sp)	/ save scale of lhs
X	clr	r4
X2:
X	asl	r3
X	rol	r2
X	bcs	1f
X	inc	r4		/ bump rhs scale
X	br	2b
X1:
X	clr	r0
X	mov	$1,r1
X	sub	(sp)+,r4	/ difference in scale (rhs - lhs)
X	ashc	r4,r0		/ initial quotient adder
X	mov	r1,-(sp)	/ quoadder lo
X	mov	r0,-(sp)	/ quoadder hi
X	mov	12.(sp),r0	/ r0 = hi(lhs)
X	mov	14.(sp),r1	/ r1 = lo(lhs)
X	mov	16.(sp),r2	/ r2 = hi(rhs)
X	mov	18.(sp),r3	/ r3 = lo(rhs)
X
X	ashc	r4,r2		/ scale rhs up for repetitive subtraction
X	clr	r4		/ quo lo
X	clr	-(sp)		/ quo hi
Xdocmp1:
X	cmp	r2,r0
X	bhi	noadd1
X	blo	dosub1
X	cmp	r3,r1
X	bhi	noadd1
Xdosub1:
X	sub	r3,r1
X	sbc	r0
X	sub	r2,r0
X	add	4(sp),r4	/ quo lo += quoadder lo
X	adc	(sp)		/ quo hi
X	add	2(sp),(sp)	/ quo hi += quoadder hi
X	br	docmp1
Xnoadd1:
X	clc			/ right shift rhs
X	ror	r2
X	ror	r3
X	clc			/ right shift quotient adder
X	ror	2(sp)
X	ror	4(sp)
X	bne	docmp1		/ quo adder not 0 means more to do
X	tst	2(sp)		
X	bne	docmp1
X	add	$6,sp		/ remove quo adder and quo high
X9:
X	mov	(sp)+,r4	/ r0,r1 have remainder
X	mov	(sp)+,r3
X	mov	(sp)+,r2
X	rts	pc
X#endif KERNEL
X
X/*
X * u_long ualrem(lhs, rhs)
X *	u_long	*lhs, rhs;
X *
X * 32-bit "/=" routine.  Calls to ualrem are generated automatically by the C
X * compiler.
X */
X
X	.globl	ualrem
Xualrem:
XENTRY(ualrem)
X	mov	r2,-(sp)	/ need a register to point at the lhs
X	mov	8.(sp),-(sp)	/ The rem algorithm is long
X	mov	8.(sp),-(sp)	/   enough that it just doesn't make sense
X	mov	8.(sp),r2	/   to bother repeating it.  We just translate
X	mov	2(r2),-(sp)	/   the call for ulrem and let it do the work
X	mov	(r2),-(sp)	/   and return its results (also stuffing it
X	jsr	pc,ulrem	/   into *lhs)
X	add	$8.,sp		/ clean up stack
X	mov	r0,(r2)+	/ store high word,
X	mov	r1,(r2)		/   and low
X	mov	(sp)+,r2	/ restore r2
X	rts	pc		/   and return
SHAR_EOF
fi
if test -f '/usr/src/lib/libc/pdp/crt/uldiv.s'
then
	echo shar: "will not over-write existing file '/usr/src/lib/libc/pdp/crt/uldiv.s'"
else
sed 's/^X//' << \SHAR_EOF > '/usr/src/lib/libc/pdp/crt/uldiv.s'
X/*
X * Program: uldiv.s
X * Copyright 1993, GTE Government Systems
X * Author:  Steven M. Schultz
X *
X *  Version	Date		Modification
X *	0.0	02Feb91		1. Initial inspiration struck.
X *	1.0	05Jun93		2. Released into the Public Domain.
X*/
X
X#include "DEFS.h"
X
X/*
X * All routines have both a C interface and an assembly interface.  Normally
X * the two are the same.  In the case of 'ulsh' the compiler has placed one
X * of the operands in r0 and r1 so the assembly interface differs from the
X * C interface.
X*/
X
X/*
X * u_long uldiv(lhs, rhs)
X *	u_long	lhs, rhs;
X *
X * unsigned 32-bit "/" routine.  Calls to uldiv are generated automatically 
X * by the C compiler.
X */
X
X#if !defined(KERNEL)
X/*
X * uldiv for applications (uses floating point)
X */
X	.globl l2f, l6f
X	.globl uldiv
Xuldiv:
XENTRY(uldiv)
X	jsr	pc,l2f		/ 2(sp) -> fr0
X	jsr	pc,l6f		/ 6(sp) -> fr3
X	tstf	fr3		/ check for zero divisor
X	cfcc			/   don't want to have an FP fault
X	beq	1f		/   in integer arithmetic
X	divf	fr3,fr0		/ fr0 /= rhs
X1:
X	movfi	fr0,-(sp)
X	mov	(sp)+,r0	/ return result
X	mov	(sp)+,r1
X	seti
X	rts	pc
X#else
X/*
X * uldiv for the kernel (fixed point only - no FP)
X */
X
X	.globl uldiv
Xuldiv:
XENTRY(uldiv)
X	mov	r2,-(sp)	/ faster than csv/cret ...
X	mov	r3,-(sp)
X	mov	r4,-(sp)
X	mov	14.(sp),r3	/ r3 = lo(rhs)
X	bmi	slowuldiv	/  rhs >= 2^15
X	tst	12.(sp)		/ hi(rhs) empty?
X	bne	slowuldiv	/   no, rhs >= 2^16
X
X	mov	10.(sp),r2	/ r2 = lo(lhs)
X	mov	8.(sp),r1	/ r1 = hi(lhs)
X
X	clr	r0		/ r0 = hi(lhs) / lo(rhs)
X	div	r3,r0		/ r1 = hi(lhs) % lo(rhs)
X	mov	r0,r4		/ save high quotient
X	mov	r1,-(sp)	/ stash hi(tmp)
X	mov	r1,r0		/ tmp=(hi(lhs)%lo(rhs))<<16 | lo(lhs)
X	mov	r2,r1		/ (r0:r1 = tmp)
X	div	r3,r0		/ r0 = tmp / lo(rhs)
X	bvc	3f		/ done if tmp/lo(rhs) < 2^15
X
X	mov	(sp),r0		/ reload r0:r1 with tmp (regs may be
X	mov	r2,r1		/   clobbered by failed div)
X	sub	r3,r0		/ r0:r1 -= 2^16 * lo(rhs)
X	div	r3,r0
X	tst	r1		/ if (negative) remainder, subtract one from
X	sxt	r1		/   quotient
X	add	r1,r0		/ cannot overflow!
X3:
X	tst	(sp)+		/ pop hi(tmp) off stack
X	mov	r0,r1		/ r1 (lo(quo)) = tmp / lo(rhs)
X	mov	r4,r0		/ r0 (hi(quo)) = hi(lhs) / lo(rhs)
X9:
X	mov	(sp)+,r4	/ restore registers
X	mov	(sp)+,r3
X	mov	(sp)+,r2
X	rts	pc
X
X/*
X * The divisor (rhs) is known to be >= 2^15 so we perform a shift and
X * subtract algorithm.  It's slow - feel free to improve it.
X *
X * The algorithm for signed divide broke down for unsigned operands, a slower
X * larger, more painful algorithm was implmented using scaling and
X * repetitive subraction/shifting.  Works best for large numbers (fewer
X * shifts that way).
X */
Xslowuldiv:
X	mov	8.(sp),r0	/ r0 = hi(lhs)
X	mov	10.(sp),r1	/ r1 = lo(lhs)
X	mov	12.(sp),r2	/ r2 = hi(rhs)
X				/ r3 = lo(rhs) - already done
X
X	clr	r4		/ init scale of lhs
X2:
X	ashc	$1,r0
X	blos	1f		/ check for zero at same time
X	inc	r4
X	br	2b
X1:
X	mov	r4,-(sp)	/ save scale of lhs
X	clr	r4
X2:
X	asl	r3
X	rol	r2
X	bcs	1f
X	inc	r4		/ bump rhs scale
X	br	2b
X1:
X	clr	r0
X	mov	$1,r1
X	sub	(sp)+,r4	/ difference in scale (rhs - lhs)
X	ashc	r4,r0		/ initial quotient adder
X	mov	r1,-(sp)	/ quoadder lo
X	mov	r0,-(sp)	/ quoadder hi
X	mov	12.(sp),r0	/ r0 = hi(lhs)
X	mov	14.(sp),r1	/ r1 = lo(lhs)
X	mov	16.(sp),r2	/ r2 = hi(rhs)
X	mov	18.(sp),r3	/ r3 = lo(rhs)
X
X	ashc	r4,r2		/ scale rhs up for repetitive subtraction
X	clr	r4		/ quo lo
X	clr	-(sp)		/ quo hi
Xdocmp:
X	cmp	r2,r0
X	bhi	noadd
X	blo	dosub
X	cmp	r3,r1
X	bhi	noadd
Xdosub:
X	sub	r3,r1
X	sbc	r0
X	sub	r2,r0
X	add	4(sp),r4	/ quo lo += quoadder lo
X	adc	(sp)		/ quo hi
X	add	2(sp),(sp)	/ quo hi += quoadder hi
X	br	docmp
Xnoadd:
X	clc			/ right shift rhs
X	ror	r2
X	ror	r3
X	clc			/ right shift quotient adder
X	ror	2(sp)
X	ror	4(sp)
X	bne	docmp		/ quo adder not 0 means more to do
X	tst	2(sp)		
X	bne	docmp
X	mov	(sp)+,r0	/ quo hi
X	mov	r4,r1		/ quo lo
X	cmp	(sp)+,(sp)+	/ remove quot adder
X	br	9b
X#endif KERNEL
X
X/*
X * u_long ualdiv(lhs, rhs)
X *	u_long	*lhs, rhs;
X *
X * 32-bit "/=" routine.  Calls to ualdiv are generated automatically by the C
X * compiler.
X */
X
X	.globl	ualdiv
Xualdiv:
XENTRY(ualdiv)
X	mov	r2,-(sp)	/ need a register to point at the lhs
X	mov	8.(sp),-(sp)	/ The divide algorithm is long
X	mov	8.(sp),-(sp)	/   enough that it just doesn't make sense
X	mov	8.(sp),r2	/   to bother repeating it.  We just translate
X	mov	2(r2),-(sp)	/   the call for uldiv and let it do the work
X	mov	(r2),-(sp)	/   and return its results (also stuffing it
X	jsr	pc,uldiv	/   into *lhs)
X	add	$8.,sp		/ clean up stack
X	mov	r0,(r2)+	/ store high word,
X	mov	r1,(r2)		/   and low
X	mov	(sp)+,r2	/ restore r2
X	rts	pc		/   and return
SHAR_EOF
fi
exit 0
#	End of shell archive
