| /*
* The original version of this program was provided by Robert Gries.
* Comments and features have been added to this version by Robert J. Sampson.
*
* This DEC C example demonstrates the ability to improve the speed of
* some integer divide (quotient) and modulus (remainder) operations on Alpha
* processors. For a divide operation to benefit, the divisor (unknown at
* compile time) must be known at run time, prior to being used many times.
* For a modulus operation to benefit, the associated quotient must be known.
*
* A significant speedup for in-line (vs. compiler-generated) division
* can be expected for divisors averaging greater than 512, and dividends
* averaging several times greater than the divisors. The best speed-ups
* (up to about four times) are acheived for very large operand values.
*
* The inverse and right shift must be pre-computed for predicted set(s)
* of divisors to be used many times by the application. For each divide
* operation (with some exceptions), the dividend is multiplied by the inverse,
* the right shift is applied, and (if needed) the quotient is made negative.
* In the general case, this will execute faster than the equivalent run-time
* support routine call, by which the compiler currently implements a divide
* for any variable divisor not known at compile time.
*
* The dividend can be any signed or unsigned integer. For each
* pre-selected divisor, the programmer must call a routine (uint_invert
* for an unsigned divisor, or int_invert for a signed divisor) to
* pre-compute and store the inverse and right shift values, with the
* following arguments:
*
* (1) divisor;
* (2) size of inverse argument (3) in bytes;
* (3) inverse (address to be written);
* (4) size of right shift argument (5) in bytes;
* (5) right shift (address to be written);
*
* The programmer implements an integer divide operation (for a known
* divisor) as in-line code, by invoking a macro (UINT_DIV for unsigned
* operands, or INT_DIV for signed operands), with the following arguments:
*
* (1) dividend;
* (2) divisor;
* (3) inverse;
* (4) right shift;
* (5) quotient result variable (used as the left-hand side for assignment).
*
* The programmer implements an integer modulus operation (for a known
* quotient) as in-line code, by invoking a macro (INT_MOD for either unsigned
* or signed operands), with the following arguments:
*
* (1) dividend;
* (2) divisor;
* (3) quotient;
* (4) modulus result variable (used as the left-hand side for assignment).
*
* The dividend, divisor, right shift, and quotient can be stored in any
* of the integer sizes; 1, 2, 4, or 8 bytes. The integer size specified for
* the inverse determines the allowable range of dividends and divisors. The
* programmer is responsible for declaring all storage arrays and variables
* with types large enough to represent every possible value to be stored,
* and for performing any necessary type casts and/or conversions.
*
* Use of compact storage can improve cache speedups. However, it can
* also hurt performance, especially on older Alpha processors and compilers.
*
* Starting with the EV56 Alpha processor, byte and word, load and store
* instructions are implemented in hardware. Starting with the DEC C V5.5
* compiler, specifying /ARCHITECTURE=EV56 enables their generation.
* Unless emulation is provided, the resulting program will fail on older
* Alpha processors. This emulation is provided by OpenVMS V7.1, Digital
* UNIX V4.0, Windows NT V4.0, and later versions of these operating systems
* on Alpha. However, each emulated instruction is likely to execute hundreds
* of times more slowly than would its processor hardware implementation.
*/
/*
* Define the types (and therefore the storage sizes and ranges of values)
* of inverse, right shift, and signed and unsigned operand values.
*
* INVERSE_TYPE should always be unsigned, and its size should always be
* at least as large as the largest of the signed and unsigned dividend
* and divisor types.
*
* RTSHIFT_TYPE should always be signed. Its size should be selected
* for fast access by the target Alpha processor. For example, int32 is
* appropriate for EV4, EV5, and generic targets, but int8 is appropriate
* for EV56 and newer targets when using byte/word load/store instructions.
*
* USE_MULQ must be defined as false (zero) when INVERSE_TYPE is larger
* than four bytes (32 bits). Otherwise, USE_MULQ may be defined as true
* (non-zero), but it doesn't have to be.
*/
#define INVERSE_TYPE uint64
#define USE_MULQ 0
#define RTSHIFT_TYPE int32
#define SIGNED_DVD_TYPE int64
#define UNSIGNED_DVD_TYPE uint64
#define SIGNED_DVR_TYPE int64
#define UNSIGNED_DVR_TYPE uint64
#define SIGNED_QUO_TYPE int64
#define UNSIGNED_QUO_TYPE uint64
#define SIGNED_MOD_TYPE SIGNED_DVR_TYPE
#define UNSIGNED_MOD_TYPE UNSIGNED_DVR_TYPE
/*
* The SEED_INT_RAN macro initializes the random number generator seed.
*/
#if __VMS && (__VMS_VER < 70000000)
#define SEED_INT_RAN(seedvar) \
srand((unsigned int)seedvar);
#else
#define SEED_INT_RAN(seedvar) \
seed48((unsigned short*)&seedvar);
#endif
/*
* The UINT_RAN macro generates an unsigned pseudo-random integer value.
* The number of significant bits is varied according to index,
* within the limits imposed by minbits and maxbits.
*/
#define UINT_RAN(ranval, index, minbits, maxbits) \
{ \
register uint64 nbits; \
register uint64 bitmask; \
ranval = mrand48(); \
ranval <<= 32; \
ranval ^= mrand48(); /* generate 64 random bits */ \
nbits = maxbits; \
nbits -= minbits; \
nbits++; \
nbits = index % nbits; \
nbits += minbits; /* nbits range: minbits thru maxbits */ \
if (nbits < (8 * sizeof(ranval))) \
{ \
bitmask = 1uL; \
bitmask <<= nbits; \
bitmask--; \
ranval &= bitmask; /* clear bits above nbits */ \
} \
nbits--; \
bitmask = 1uL; \
bitmask <<= nbits; \
ranval |= bitmask; /* set most-significant bit */ \
}
/*
* The INT_RAN macro generates a signed pseudo-random integer value.
* In order to reserve the most-significant bit for the sign,
* special processing may be needed.
*/
#define INT_RAN(ranval, index, minbits, maxbits) \
{ \
UINT_RAN(ranval, index, minbits, maxbits) \
if (maxbits == (8 * sizeof(ranval))) \
{ \
register uint64 bitmask; \
bitmask = 1uL; \
bitmask <<= maxbits - 1; \
bitmask = ~bitmask; \
ranval &= bitmask; /* clear reserved sign bit */ \
bitmask = 1uL; \
bitmask <<= minbits - 1; \
if (minbits == maxbits) bitmask >>= 1; \
ranval |= bitmask; /* set minimum bit */ \
} \
if (index & 1) ranval = -ranval; \
}
/*
* The UMULH_AND_SHIFT macro combines the UMULH and right shift operations.
* If the specified right shift is negative, special processing is done.
*/
#define UMULH_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient) \
{ \
register uint64 tmp_udividend = (uint64)udividend; \
register int32 tmp_rtshift = (int32)rtshift; \
do \
{ \
if (tmp_rtshift < 0) \
{ \
tmp_rtshift = -tmp_rtshift; \
if (tmp_rtshift == ((8 * sizeof(inverse)) - 1)) \
{ \
tmp_udividend = (tmp_udividend >= (uint64)udivisor); \
tmp_udividend <<= tmp_rtshift; \
break; \
} \
else \
{ \
tmp_udividend++; \
if (!tmp_udividend) \
{ \
tmp_udividend = (uint64)inverse \
<< (8 * (sizeof(uint64) - sizeof(inverse))); \
break; \
} \
} \
} \
tmp_udividend = (uint64)asm("umulh %a0, %a1, %v0", tmp_udividend, \
(uint64)inverse << (8 * (sizeof(uint64) - sizeof(inverse)))); \
} \
while (0); \
quotient = tmp_udividend >> tmp_rtshift; \
}
/*
* The MULQ_AND_SHIFT macro combines the MULQ and right shift operations.
* If the specified right shift is negative, special processing is done.
*/
#define MULQ_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient) \
{ \
register uint64 tmp_udividend = (uint64)udividend; \
register int32 tmp_rtshift = (int32)rtshift; \
do \
{ \
if (tmp_rtshift < 0) \
{ \
tmp_rtshift = -tmp_rtshift; \
if (tmp_rtshift == ((8 * sizeof(inverse)) - 1)) \
{ \
tmp_udividend = (tmp_udividend >= (uint64)udivisor); \
tmp_udividend <<= tmp_rtshift; \
break; \
} \
else \
{ \
tmp_udividend++; \
if (!tmp_udividend) \
{ \
tmp_udividend = (uint64)inverse; \
break; \
} \
} \
} \
tmp_udividend *= (uint64)inverse; \
} \
while (0); \
quotient = tmp_udividend >> tmp_rtshift; \
}
/*
* The MULTIPLY_AND_SHIFT macro is selected as a synonym for either the
* MULQ_AND_SHIFT macro or the UMULH_AND_SHIFT macro, depending on the
* USE_MULQ macro, which may be defined as true (non-zero), only if the
* INVERSE_TYPE macro defines a type no larger than four bytes (32 bits).
*/
#if USE_MULQ
#define MULTIPLY_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient) \
MULQ_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient)
#else
#define MULTIPLY_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient) \
UMULH_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient)
#endif
/*
* The CHECK_DIVISOR macro allows the use of divisors zero and one.
*
* When this option is selected (by defining the macro with a non-zero value),
* the divisor is checked at divide time. Performance is usually somewhat
* decreased, but the correct quotient is always obtained for divisor one,
* and the expected (fatal) divide-by-zero fault occurs for divisor zero.
*
* When this option is declined (by defining the macro with a value of zero),
* the divisor is not checked at divide time. Performance is usually somewhat
* increased, but quotients are usually incorrect for divisor one, and divisor
* zero does not cause a divide-by-zero fault.
*
* When called with divisor zero, invert routines return failure status.
* When called with divisor one, invert routines return the specified
* CHECK_DIVISOR value.
*/
#ifndef CHECK_DIVISOR
#define CHECK_DIVISOR 0
#endif
/*
* UINT_DIV is the unsigned integer divide macro.
*/
#if CHECK_DIVISOR
#define UINT_DIV(udividend, udivisor, inverse, rtshift, quotient) \
if ((uint64)udivisor > 512) \
MULTIPLY_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient) \
else \
quotient = (uint64)udividend / (uint64)udivisor;
#else
#define UINT_DIV(udividend, udivisor, inverse, rtshift, quotient) \
MULTIPLY_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient)
#endif
/*
* INT_DIV is the signed integer divide macro.
*/
#if CHECK_DIVISOR
#define INT_DIV(dividend, divisor, inverse, rtshift, quotient) \
{ \
register uint64 neg_divisor = (divisor < 0); \
register uint64 udivisor = neg_divisor ? -divisor : divisor; \
if (udivisor > 512) \
{ \
register uint64 neg_dividend = (dividend < 0); \
register uint64 udividend = neg_dividend ? -dividend : dividend; \
register uint64 neg_quotient = (neg_dividend ^ neg_divisor); \
MULTIPLY_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient) \
if (neg_quotient) quotient = -quotient; \
} \
else \
quotient = (int64)dividend / (int64)divisor; \
}
#else
#define INT_DIV(dividend, divisor, inverse, rtshift, quotient) \
{ \
register uint64 neg_divisor = (divisor < 0); \
register uint64 udivisor = neg_divisor ? -divisor : divisor; \
register uint64 neg_dividend = (dividend < 0); \
register uint64 udividend = neg_dividend ? -dividend : dividend; \
register uint64 neg_quotient = (neg_dividend ^ neg_divisor); \
MULTIPLY_AND_SHIFT(udividend, udivisor, inverse, rtshift, quotient) \
if (neg_quotient) quotient = -quotient; \
}
#endif
/*
* INT_MOD is the integer modulus (remainder) macro.
* The quotient must have already been computed.
*
* The DEC C V5.5 compiler apparently does not exploit modulus operations
* for which the quotient is already known. In this case, INT_MOD
* may perform significantly better than the modulus (%) operator.
*/
#define INT_MOD(dividend, divisor, quotient, modulus) \
modulus = dividend - (divisor * quotient);
#include <c_asm.h>
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if __VMS
#include <builtins.h>
#include <ints.h>
#include <jpidef.h>
#include <lib$routines.h>
#include <ssdef.h>
#include <starlet.h>
#if __VMS_VER < 70000000
#define mrand48 rand
#endif
#else
#include <time.h>
#define int8 signed char
#define int16 signed short int
#define int32 signed int
#define int64 signed long int
#define uint8 unsigned char
#define uint16 unsigned short int
#define uint32 unsigned int
#define uint64 unsigned long int
#endif
#if __VMS
#pragma extern_model save
#pragma extern_model strict_refdef
extern const uint32 MMG$GL_PAGE_SIZE;
#pragma extern_model restore
/*
* Perform an operation on a range of pages. The specified service
* can lock pages into (or unlock pages from) process working set
* (or physical memory), delete pages, or set protection on pages,
* using sys$lkwset, sys$ulwset, sys$lckpag, sys$ulkpag, sys$deltva,
* or sys$setprt. The service is always called with five arguments.
* The first three (inadr, retadr, acmode) are recognized by all of
* the above services. Only sys$setprt recognizes the last two
* (prot, prvprt). The acmode argument is specified as a zero;
* the service interprets this as the access mode of the caller.
* The prvprt argument is specified as a zero pointer. This routine
* works only for virtual address ranges in P0 and P1; not P2, S0, S1,
* or any other process or system region.
*/
int on_range_of_pages(
int pservice(),
const char *range[2],
const uint32 prot)
{
register int status;
const register uint32 page_mask = ~(MMG$GL_PAGE_SIZE-1);
char *inadr[2], *retadr[2];
int32 trying, success;
inadr[0] = (char*)range[0];
inadr[1] = (char*)range[1];
do
{
status = (pservice)(inadr,retadr,0,prot,0);
success = (retadr[0] != (char*)0xFFFFFFFF);
trying = 0;
if (!(status & 1)) /* service did not reach end of range */
{
if ((status != SS$_LKWSETFUL)
&& (status != SS$_LCKPAGFUL)
&& (status != SS$_IVPROTECT))
{
trying = (inadr[0]
< (char*)(((uint32)range[1]) & page_mask));
if (trying) /* still haven't covered the entire range */
{
if (success) /* continue on the next page */
inadr[0] = retadr[1] + 1 + MMG$GL_PAGE_SIZE;
else /* skip the failed first page */
inadr[0] += MMG$GL_PAGE_SIZE;
}
else /* entire range has been covered */
status = SS$_NORMAL;
} /* lock limit not reached */
} /* complete range not covered yet */
} /* keep trying until done or lock limit reached */
while (trying);
return status;
} /* on_range_of_pages() */
/*
* Attempt to perform the specified service on all process pages.
* If the operation works on all eligible pages, consider it successful.
*/
int on_all_process_pages(int pservice())
{
register int status, p0sts, p1sts;
static const char *p0range[2] = {(char*)0x00000000, (char*)0x3FFFFFFF};
static const char *p1range[2] = {(char*)0x40000000, (char*)0x7FFFFFFF};
char *frep0va, *frep1va;
char *p0inadr[2], *p1inadr[2];
char *p0retadr[2], *p1retadr[2];
int32 jpiosb[2];
struct
{
struct
{
uint16 buflen;
uint16 itmcod;
char **bufadr;
uint16 *retlen;
} frep0va;
struct
{
uint16 buflen;
uint16 itmcod;
char **bufadr;
uint16 *retlen;
} frep1va;
uint32 listend;
} jpilst = {{sizeof(frep0va), JPI$_FREP0VA, 0, 0},
{sizeof(frep1va), JPI$_FREP1VA, 0, 0}, 0};
jpilst.frep0va.bufadr = &frep0va;
jpilst.frep1va.bufadr = &frep1va;
status = sys$getjpiw(0,0,0,&jpilst,jpiosb,0,0);
if (status & 1) status = jpiosb[0];
if (!(status & 1)) return status;
p0inadr[0] = (char*)p0range[0];
p0inadr[1] = frep0va - 1; /* last byte on previous page */
p1inadr[0] = frep1va + MMG$GL_PAGE_SIZE; /* first byte on next page */
p1inadr[1] = (char*)p1range[1];
p0sts = on_range_of_pages(pservice, (const char**)p0inadr, 0);
status = p0sts;
p1sts = on_range_of_pages(pservice, (const char**)p1inadr, 0);
if ((status & 1) && !(p1sts & 1)) status = p1sts;
return status;
} /* on_all_process_pages() */
#endif
/*
* Provide a table of the base-two logarithms for integers 1 thru 255.
*/
const static int32 small_log2_table[256] = {
-1,
0,
1,1,
2,2,2,2,
3,3,3,3,3,3,3,3,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 };
/* Compute the base-two logarithm of an unsigned integer,
* using the Alpha-specific CMPBGE and EXTBL instructions.
*/
int32 log_base_two(uint64 iarg)
{
register int32 jbits, klog2;
/*
* If the argument value is small enough, the logarithm can be accessed
* directly from the table. This special case may or may not improve
* the overall average performance of this routine for a particular
* application. It can be removed if desired.
*/
if (iarg < sizeof(small_log2_table)/sizeof(small_log2_table[0]))
return small_log2_table[iarg];
/*
* Check all eight bytes of iarg at once, to determine which are zero.
* The CMPBGE Alpha instruction does eight parallel unsigned byte comparisons
* between corresponding bytes of the first and second operands, and stores
* eight result bits, each of which is set only if the corresponding byte of
* the first operand is greater than or equal to the corresponding byte of
* the second operand. The first operand is R31; always read as zero.
*/
jbits = asm("cmpbge %r31, %a0, %v0", iarg);
/*
* Flip the eight bits in jbits with an exclusive OR operation,
* so that each set bit corresponds to a non-zero byte in iarg.
*/
jbits ^= 0xFF;
/*
* Use jbits to select the value from the table, which gives the byte number
* (range 0 thru 7) of the most-significant non-zero byte in iarg.
*/
klog2 = small_log2_table[jbits];
/*
* Use the EXTBL Alpha instruction to extract (into jbits) the
* most-significant non-zero byte of iarg, using byte number klog2.
*/
jbits = asm("extbl %a0, %a1, %v0", iarg, klog2);
/*
* Compute the base-two logarithm of iarg as the sum of two components.
* The first component is the bit offset of the most-significant non-zero byte.
* Convert the byte number klog2 (range 0 thru 7 step 1) to the bit offset
* (range 0 thru 56 step 8) of that byte. The second component is the base-two
* logarithm of the byte value. Select this directly from the table. This is
* the "slide rule" principle: Let "a" and "b" represent the two components.
* The logarithm of their product is equal to the sum of their logarithms:
*
* log2(a * b) = log2(a) + log2(b)
*/
return (klog2 << 3) + small_log2_table[jbits];
} /* log_base_two() */
/*
* Generate the largest unsigned integer that fits
* into the specified # of bytes, and/or bits.
*/
uint64 max_uint_value(uint32 nbytes, uint32 nbits)
{
register uint64 ulargest, bitmask, zapbytes;
register uint32 i;
ulargest = 0uL;
ulargest = ~ulargest;
zapbytes = 0uL;
if (nbytes)
{
for (i = nbytes; i < sizeof(uint64); i++)
{
bitmask = 1uL;
bitmask <<= i;
zapbytes |= bitmask;
}
}
ulargest = (uint64)asm("zap %a0, %a1, %v0", ulargest, zapbytes);
if (nbits && ((!nbytes) || (nbits < (8*nbytes))))
{
for (i = nbits; i < 8*sizeof(uint64); i++)
{
bitmask = 1uL;
bitmask <<= i;
bitmask = ~bitmask;
ulargest &= bitmask;
}
}
return ulargest;
} /* max_uint_value() */
/*
* Invert an unsigned integer divisor value.
*/
int32 uint_invert(
uint64 udivisor,
uint32 ninvbytes,
void *pinverse,
uint32 nrtsbytes,
void *prtshift)
{
register uint64 inverse, remainder, udividend, ulargest;
register uint32 nbits, topbit;
register int32 rtshift;
uint64 mem_inverse;
int64 mem_rtshift;
/*
* Clear the bytes of the inverse argument.
*/
memset(pinverse, 0, ninvbytes);
/*
* Initialize the bytes of the right shift argument.
*/
memset(prtshift, -1, nrtsbytes);
/*
* Division by zero is undefined, and should be avoided.
* Always return failure status when the divisor is zero.
*/
if (udivisor == 0uL) return 0;
/*
* Initialize right shift to negative one, to offset a left shift of 63 bits
* (from the largest possible dividend that can be used at invert time below),
* followed by a right shift of 64 bits (from the UMULH instruction that can
* be used at divide time).
*/
rtshift = -1;
/*
* For inverse sizes of four bytes (32 bits) or less, the MULQ instruction
* can optionally be used at divide time. This requires an increased right
* shift. Larger inverse sizes require the use of the UMULH instruction.
*/
#if USE_MULQ
if (ninvbytes <= sizeof(uint32))
rtshift += 8 * ninvbytes;
else
return 0;
#endif
/*
* Generate the largest unsigned integer value that fits into the inverse size.
*/
ulargest = max_uint_value(ninvbytes, 0);
/*
* Mask the divisor, restricting its significant bits to the inverse size.
*/
udivisor &= ulargest;
/*
* Determine the most-significant bit number for the inverse.
* Use (as the dividend) the largest power of two that fits into inverse size.
*/
topbit = (sizeof(uint64) < ninvbytes) ? sizeof(uint64) : ninvbytes;
topbit *= 8;
topbit--;
udividend = 1uL;
udividend <<= topbit;
/*
* Divide selected dividend by divisor. Also compute remainder.
*/
inverse = udividend / udivisor;
remainder = udividend % udivisor;
/*
* Use remainder to calculate additional inverse bits,
* while the inverse still fits into its storage space.
*/
while (inverse < udividend)
{
nbits = topbit;
nbits -= (remainder > inverse) ?
log_base_two(remainder) :
log_base_two(inverse);
if (nbits)
{
rtshift += nbits;
remainder <<= nbits;
inverse <<= nbits;
inverse += remainder / udivisor;
remainder %= udivisor;
}
else
{
rtshift++;
remainder += remainder;
remainder -= udivisor;
inverse += inverse;
inverse++;
}
}
/*
* If double the remainder is greater than the divisor,
* then increment the *inverse* *now* (at invert time).
*/
if ((remainder + remainder) > udivisor)
inverse++;
/*
* Otherwise, if the remainder is non-zero, or in the special case of the
* largest unsigned divisor that fits into inverse size, negate the right
* shift to indicate that the *dividend* should be incremented *later*
* (at divide time).
*/
else if (remainder || (udivisor == ulargest))
rtshift = -rtshift;
/*
* Store the (unsigned) inverse argument.
* All bytes were previously cleared.
*/
if (ninvbytes > sizeof(uint64)) ninvbytes = sizeof(uint64);
mem_inverse = inverse;
memcpy(pinverse, (const void*)&mem_inverse, ninvbytes);
/*
* If the right shift is now non-negative, then the sign bits must be cleared.
*/
if (rtshift >= 0)
memset(prtshift, 0, nrtsbytes);
/*
* Store the right shift argument.
*/
if (nrtsbytes > sizeof(int64)) nrtsbytes = sizeof(int64);
mem_rtshift = rtshift;
memcpy(prtshift, (const void*)&mem_rtshift, nrtsbytes);
/*
* Division by one is detected only if the CHECK_DIVISOR macro is true
* (non-zero) at compile time. Otherwise, the sign of the quotient
* may be wrong, and its least-significant bit of the quotient (if set
* in the dividend) is dropped.
*/
return (udivisor == 1uL) ? CHECK_DIVISOR : 1;
} /* uint_invert() */
/*
* Invert a signed integer divisor.
*/
int32 int_invert(
int64 divisor,
uint32 ninvbytes,
void *pinverse,
uint32 nrtsbytes,
void *prtshift)
{
register uint64 udivisor;
/*
* Convert the divisor to an unsigned (positive) value. If either
* the dividend or the divisor (but not both) is negative, the quotient
* is made negative after the unsigned division operation is done.
*/
udivisor = (divisor < 0L) ? -divisor : divisor;
/*
* Now call the unsigned invert routine to do the actual work.
*/
return uint_invert(udivisor, ninvbytes, pinverse, nrtsbytes, prtshift);
} /* int_invert() */
/*
* This is the main program, showing the use of the above macros and routines.
*/
#if __VMS
int alpha_integer_divide(int argc, char *argv[])
#pragma nostandard
main_program
#pragma standard
#else
int main(int argc, char *argv[])
#endif
{
#define NDIVISORS 1024
#define NDIVIDENDS 4096
#define MAXHST 100
register uint64 uabove, ubelow, nunique, nsigned;
register int64 sumdifquo, difquo;
register int64 sumdifmod, difmod;
register int64 sumcmpquo, sumgriquo;
register int64 sumcmpmod, sumgrimod;
register SIGNED_DVR_TYPE a_divisor;
register SIGNED_DVR_TYPE minsgndvr, maxsgndvr;
register SIGNED_DVR_TYPE mindvr, maxdvr;
register SIGNED_DVR_TYPE negdvrnrzero, posdvrnrzero;
register SIGNED_DVD_TYPE a_dividend;
register SIGNED_DVD_TYPE mindvd, maxdvd;
register SIGNED_DVD_TYPE negdvdnrzero, posdvdnrzero;
register SIGNED_QUO_TYPE cmpquo, griquo;
register SIGNED_MOD_TYPE cmpmod, grimod;
register uint32 nbadquo, nbadmod;
register int mindvdbits, maxdvdbits;
register int mindvrbits, maxdvrbits;
register int32 i, j, k;
int64 bintim;
SIGNED_DVR_TYPE divisor[NDIVISORS];
INVERSE_TYPE inverse[NDIVISORS];
RTSHIFT_TYPE rtshift[NDIVISORS];
uint32 negdiv[NDIVISORS];
uint32 posdiv[NDIVISORS];
uint32 neghst[MAXHST];
uint32 poshst[MAXHST];
#if __VMS
register int status;
uint32 msgvec[2] = {1,0};
#else
clock_t sclk, fclk;
#endif
/*
* Attempt to lock as many user-owned process pages
* as possible into the working set.
*/
#if __VMS
status = on_all_process_pages(&sys$lkwset);
if (!(status & 1))
{
msgvec[1] = status;
printf("Failed to lock user-owned pages into working set;\n");
sys$putmsg(msgvec, 0, 0, 0);
}
#endif
/*
* Attempt to disable process swapping.
*/
#if __VMS
status = sys$setswm(1);
if (!(status & 1))
{
msgvec[1] = status;
printf("Failed to disable swapping;\n");
sys$putmsg(msgvec, 0, 0, 0);
}
#endif
/*
* Fetch and announce the minimum and maximum significant bit counts.
*/
mindvdbits = 0;
maxdvdbits = 0;
if (argc > 1)
mindvdbits = atoi(argv[1]);
if (argc > 2)
maxdvdbits = atoi(argv[2]);
if (mindvdbits < 2)
mindvdbits = 2;
else if (mindvdbits > (8 * sizeof(SIGNED_DVD_TYPE)))
mindvdbits = 8 * sizeof(SIGNED_DVD_TYPE);
if ((maxdvdbits < mindvdbits)
|| (maxdvdbits > (8 * sizeof(SIGNED_DVD_TYPE))))
maxdvdbits = 8 * sizeof(SIGNED_DVD_TYPE);
mindvrbits = mindvdbits;
maxdvrbits = maxdvdbits;
if (argc > 3)
mindvrbits = atoi(argv[3]);
if (argc > 4)
maxdvrbits = atoi(argv[4]);
if (mindvrbits < 2)
mindvrbits = 2;
else if (mindvrbits > (8 * sizeof(SIGNED_DVR_TYPE)))
mindvrbits = 8 * sizeof(SIGNED_DVR_TYPE);
if ((maxdvrbits < mindvrbits)
|| (maxdvrbits > (8 * sizeof(SIGNED_DVR_TYPE))))
maxdvrbits = 8 * sizeof(SIGNED_DVR_TYPE);
printf(
"Dividends should have at least %d, but no more than %d, significant bits.\n",
mindvdbits, maxdvdbits);
printf(
"Divisors should have at least %d, but no more than %d, significant bits.\n",
mindvrbits, maxdvrbits);
/*
* Announce the compiled storage sizes.
*/
printf("Inverse size: %u bits.\n", 8*sizeof(INVERSE_TYPE));
#if USE_MULQ
if (sizeof(INVERSE_TYPE) > sizeof(uint32))
printf("USE_MULQ not valid for sizeof(INVERSE_TYPE) > sizeof(uint32).\n");
else
printf("Using MULQ (not UMULH) instruction.\n");
#else
printf("Using UMULH (not MULQ) instruction.\n");
#endif
printf("Right shift size: %u bits.\n", 8*sizeof(RTSHIFT_TYPE));
printf("Dividend size: %u/%u bits (signed/unsigned).\n",
8*sizeof(SIGNED_DVD_TYPE), 8*sizeof(UNSIGNED_DVD_TYPE));
printf("Divisor size: %u/%u bits (signed/unsigned).\n",
8*sizeof(SIGNED_DVR_TYPE), 8*sizeof(UNSIGNED_DVR_TYPE));
printf("Quotient size: %u/%u bits (signed/unsigned).\n",
8*sizeof(SIGNED_QUO_TYPE), 8*sizeof(UNSIGNED_QUO_TYPE));
printf("Modulus size: %u/%u bits (signed/unsigned).\n",
8*sizeof(SIGNED_MOD_TYPE), 8*sizeof(UNSIGNED_MOD_TYPE));
/*
* Generate the largest unsigned integer divisor for maximum bits.
* Generate the largest unsigned integer divisor for less than minimum bits.
* Get the numbers of possible unique unsigned and signed divisor values.
* Compute the smallest and largest signed divisor values.
*/
uabove = max_uint_value(sizeof(SIGNED_DVR_TYPE), maxdvrbits);
ubelow = max_uint_value(sizeof(SIGNED_DVR_TYPE), mindvrbits-1);
nunique = uabove - ubelow;
if (maxdvrbits < (8 * sizeof(SIGNED_DVR_TYPE)))
{
nsigned = 2uL * nunique;
maxsgndvr = uabove;
minsgndvr = -maxsgndvr;
}
else
{
nsigned = nunique;
a_divisor = 1;
a_divisor <<= (8 * sizeof(SIGNED_DVR_TYPE)) - 1;
minsgndvr = a_divisor;
a_divisor--;
maxsgndvr = a_divisor;
}
/*
* Generate a 48-bit value for the initial random number seed.
*/
#if __VMS
sys$gettim(&bintim);
bintim >>= 5;
#else
bintim = -1L;
#endif
/*
* Pre-select some signed divisors. Get their inverse and right shift values.
* Ensure as many unique divisor values as possible.
*/
negdvrnrzero = maxdvr = minsgndvr;
posdvrnrzero = mindvr = maxsgndvr;
printf("Selecting %u signed divisors from %Lu unique values...\n",
NDIVISORS, nsigned);
if (nsigned <= NDIVISORS)
{
a_divisor = minsgndvr;
for (k=0; k<NDIVISORS; k++)
{
while (!int_invert(a_divisor,
sizeof(inverse[k]), &inverse[k],
sizeof(rtshift[k]), &rtshift[k]))
{
a_divisor++;
if (a_divisor < 0)
{
if (a_divisor >= -ubelow) a_divisor = ubelow + 1;
}
else
{
if (a_divisor > uabove) a_divisor = minsgndvr;
}
}
divisor[k] = a_divisor;
if (a_divisor < mindvr) mindvr = a_divisor;
if (a_divisor > maxdvr) maxdvr = a_divisor;
if (a_divisor < 0)
{
if (a_divisor > negdvrnrzero) negdvrnrzero = a_divisor;
}
else
{
if (a_divisor < posdvrnrzero) posdvrnrzero = a_divisor;
}
a_divisor++;
if (a_divisor < 0)
{
if (a_divisor >= -ubelow) a_divisor = ubelow + 1;
}
else
{
if (a_divisor > uabove) a_divisor = minsgndvr;
}
}
}
else
{
SEED_INT_RAN(bintim)
for (k=0; k<NDIVISORS; k++)
{
do INT_RAN(a_divisor,k,mindvrbits,maxdvrbits)
while (!int_invert(a_divisor,
sizeof(inverse[k]), &inverse[k],
sizeof(rtshift[k]), &rtshift[k]));
i = 0;
while (i < k)
{
if (divisor[i] == a_divisor)
{
do a_divisor++;
while (!int_invert(a_divisor,
sizeof(inverse[k]), &inverse[k],
sizeof(rtshift[k]), &rtshift[k]));
i = 0;
}
else
i++;
}
divisor[k] = a_divisor;
if (a_divisor < mindvr) mindvr = a_divisor;
if (a_divisor > maxdvr) maxdvr = a_divisor;
if (a_divisor < 0)
{
if (a_divisor > negdvrnrzero) negdvrnrzero = a_divisor;
}
else
{
if (a_divisor < posdvrnrzero) posdvrnrzero = a_divisor;
}
}
}
printf("Minimum signed divisor %016LX (%+Ld).\n",
(int64)mindvr, (int64)mindvr);
printf("Negative dvr nearest zero %016LX (%+Ld).\n",
(int64)negdvrnrzero, (int64)negdvrnrzero);
printf("Positive dvr nearest zero %016LX (%+Ld).\n",
(int64)posdvrnrzero, (int64)posdvrnrzero);
printf("Maximum signed divisor %016LX (%+Ld).\n",
(int64)maxdvr, (int64)maxdvr);
/*
* Initialize the histogram arrays.
*/
for (i = 0; i < MAXHST; i++) poshst[i] = neghst[i] = 0;
for (i = 0; i < NDIVISORS; i++) posdiv[i] = negdiv[i] = 0;
/*
* Perform signed divides, both in the compiler-selected way,
* and in the Robert Gries way. Compare quotient and modulus values.
*/
nbadquo = nbadmod = 0;
sumdifquo = sumdifmod = 0;
sumcmpquo = sumcmpmod = 0;
sumgriquo = sumgrimod = 0;
a_dividend = 1;
a_dividend <<= (8 * sizeof(SIGNED_DVD_TYPE)) - 1;
negdvdnrzero = maxdvd = a_dividend;
a_dividend--;
posdvdnrzero = mindvd = a_dividend;
SEED_INT_RAN(bintim)
printf("Performing %u signed divides both ways...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
INT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
if (a_dividend < mindvd) mindvd = a_dividend;
if (a_dividend > maxdvd) maxdvd = a_dividend;
if (a_dividend < 0)
{
if (a_dividend > negdvdnrzero) negdvdnrzero = a_dividend;
}
else
{
if (a_dividend < posdvdnrzero) posdvdnrzero = a_dividend;
}
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
cmpquo = a_dividend / a_divisor;
cmpmod = a_dividend % a_divisor;
INT_DIV(a_dividend,a_divisor,inverse[k],rtshift[k],griquo)
INT_MOD(a_dividend,a_divisor,griquo,grimod)
sumcmpquo += cmpquo;
sumcmpmod += cmpmod;
sumgriquo += griquo;
sumgrimod += grimod;
difquo = griquo - cmpquo;
difmod = grimod - cmpmod;
sumdifquo += difquo;
sumdifmod += difmod;
if (difquo || difmod)
{
if (difquo) nbadquo++;
if (difmod) nbadmod++;
printf("\n");
printf("Dividend #%4d = %016LX (%+Ld)\n", j+1,
(int64)a_dividend, (int64)a_dividend);
printf("Divisor #%4d = %016LX (%+Ld)\n", k+1,
(int64)a_divisor, (int64)a_divisor);
printf("Inverse = %016LX (%Lu)\n",
(uint64)inverse[k], (uint64)inverse[k]);
printf("UMULH and MULQ = %016LX %016LX\n",
(uint64)asm("umulh %a0, %a1, %v0", (uint64)a_dividend,
(uint64)inverse[k] << (8*(sizeof(uint64)-sizeof(inverse[k])))),
(uint64)a_dividend * (uint64)inverse[k]);
printf("Right Shift = %+d\n", (int32)rtshift[k]);
printf("Compiler Quo. = %016LX (%+Ld)\n",
(int64)cmpquo, (int64)cmpquo);
printf("Compiler Mod. = %016LX (%+Ld)\n",
(int64)cmpmod, (int64)cmpmod);
printf("Gries Quotient = %016LX (%+Ld)\n",
(int64)griquo, (int64)griquo);
printf("Gries Modulus = %016LX (%+Ld)\n",
(int64)grimod, (int64)grimod);
}
if (difquo <= -MAXHST)
neghst[MAXHST-1]++;
else if (difquo >= MAXHST)
poshst[MAXHST-1]++;
else if (difquo < 0)
neghst[-difquo]++;
else
poshst[difquo]++;
if (difquo < 0)
negdiv[k]++;
else if (difquo > 0)
posdiv[k]++;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u compiler signed divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpquo);
printf("...for %u compiler signed modulii: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpmod);
printf("...for %u Gries signed divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgriquo);
printf("...for %u Gries signed modulii: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgrimod);
printf("Minimum signed dividend %016LX (%+Ld).\n",
(int64)mindvd, (int64)mindvd);
printf("Negative dvd nearest zero %016LX (%+Ld).\n",
(int64)negdvdnrzero, (int64)negdvdnrzero);
printf("Positive dvd nearest zero %016LX (%+Ld).\n",
(int64)posdvdnrzero, (int64)posdvdnrzero);
printf("Maximum signed dividend %016LX (%+Ld).\n",
(int64)maxdvd, (int64)maxdvd);
if (nbadquo || nbadmod)
{
printf("Sum of quotient differences = %+Ld; %u incorrect quotients.\n",
sumdifquo, nbadquo);
printf("Sum of modulus differences = %+Ld; %u incorrect modulii.\n",
sumdifmod, nbadmod);
printf("Histogram of quotient differences:\n");
for (i = MAXHST-1; i > 0; i--)
if (neghst[i])
printf("%+02d: %u\n", -i, neghst[i]);
for (i = 0; i < MAXHST; i++)
if (poshst[i])
printf("%+02d: %u\n", i, poshst[i]);
for (i = 0; i < NDIVISORS; i++)
if (negdiv[i] || posdiv[i])
printf("divisor #%d: %u smaller, %u larger than correct quotient.\n",
i+1, negdiv[i], posdiv[i]);
}
/*
* Do a timing run for signed compiler divides only.
*/
sumcmpquo = 0;
SEED_INT_RAN(bintim)
printf("Performing %u compiler signed divides...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
INT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
cmpquo = a_dividend / a_divisor;
sumcmpquo += cmpquo;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u compiler signed divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpquo);
/*
* Do a timing run for signed compiler quotient and modulus.
*/
sumcmpquo = 0;
sumcmpmod = 0;
SEED_INT_RAN(bintim)
printf("Performing %u compiler signed quotient and modulus...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
INT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
cmpquo = a_dividend / a_divisor;
cmpmod = a_dividend % a_divisor;
sumcmpquo += cmpquo;
sumcmpmod += cmpmod;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u compiler signed divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpquo);
printf("...for %u compiler signed modulii: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpmod);
/*
* Do a timing run for signed Gries divides only.
*/
sumgriquo = 0;
SEED_INT_RAN(bintim)
printf("Performing %u Gries signed divides...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
INT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
INT_DIV(a_dividend,a_divisor,inverse[k],rtshift[k],griquo)
sumgriquo += griquo;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u Gries signed divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgriquo);
/*
* Do a timing run for signed Gries quotient and modulus.
*/
sumgriquo = 0;
sumgrimod = 0;
SEED_INT_RAN(bintim)
printf("Performing %u Gries signed quotient and modulus...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
INT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
INT_DIV(a_dividend,a_divisor,inverse[k],rtshift[k],griquo)
INT_MOD(a_dividend,a_divisor,griquo,grimod)
sumgriquo += griquo;
sumgrimod += grimod;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u Gries signed divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgriquo);
printf("...for %u Gries signed modulii: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgrimod);
/*
* Do a timing run for signed loop overhead only.
*/
sumgriquo = 0;
SEED_INT_RAN(bintim)
printf("Performing %u signed loop overhead operations...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
INT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
sumgriquo += a_dividend;
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
sumgriquo += a_divisor;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u signed loop overhead operations: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgriquo);
/*
* Pre-select some unsigned divisors. Get their inverse and right shift values.
* Ensure as many unique divisor values as possible.
*/
mindvr = uabove;
maxdvr = 0;
printf("Selecting %u unsigned divisors from %Lu unique values...\n",
NDIVISORS, nunique);
if (nunique <= NDIVISORS)
{
a_divisor = ubelow + 1;
for (k=0; k<NDIVISORS; k++)
{
while (!uint_invert(
(UNSIGNED_DVR_TYPE)a_divisor,
sizeof(inverse[k]), &inverse[k],
sizeof(rtshift[k]), &rtshift[k]))
{
a_divisor++;
if ((a_divisor > uabove) || !a_divisor) a_divisor = ubelow + 1;
}
divisor[k] = a_divisor;
if ((UNSIGNED_DVR_TYPE)a_divisor < (UNSIGNED_DVR_TYPE)mindvr)
mindvr = a_divisor;
if ((UNSIGNED_DVR_TYPE)a_divisor > (UNSIGNED_DVR_TYPE)maxdvr)
maxdvr = a_divisor;
a_divisor++;
if ((a_divisor > uabove) || !a_divisor) a_divisor = ubelow + 1;
}
}
else
{
SEED_INT_RAN(bintim)
for (k=0; k<NDIVISORS; k++)
{
do UINT_RAN(a_divisor,k,mindvrbits,maxdvrbits)
while (!uint_invert(
(UNSIGNED_DVR_TYPE)a_divisor,
sizeof(inverse[k]), &inverse[k],
sizeof(rtshift[k]), &rtshift[k]));
i = 0;
while (i < k)
{
if (divisor[i] == a_divisor)
{
do a_divisor++;
while (!uint_invert(
(UNSIGNED_DVR_TYPE)a_divisor,
sizeof(inverse[k]), &inverse[k],
sizeof(rtshift[k]), &rtshift[k]));
i = 0;
}
else
i++;
}
divisor[k] = a_divisor;
if ((UNSIGNED_DVR_TYPE)a_divisor < (UNSIGNED_DVR_TYPE)mindvr)
mindvr = a_divisor;
if ((UNSIGNED_DVR_TYPE)a_divisor > (UNSIGNED_DVR_TYPE)maxdvr)
maxdvr = a_divisor;
}
}
printf("Minimum unsigned divisor %016LX (%Lu).\n",
(uint64)(UNSIGNED_DVR_TYPE)mindvr,
(uint64)(UNSIGNED_DVR_TYPE)mindvr);
printf("Maximum unsigned divisor %016LX (%Lu).\n",
(uint64)(UNSIGNED_DVR_TYPE)maxdvr,
(uint64)(UNSIGNED_DVR_TYPE)maxdvr);
/*
* Initialize the histogram arrays.
*/
for (i = 0; i < MAXHST; i++) poshst[i] = neghst[i] = 0;
for (i = 0; i < NDIVISORS; i++) posdiv[i] = negdiv[i] = 0;
/*
* Perform unsigned divides, both in the compiler-selected way,
* and in the Robert Gries way. Compare quotient and modulus values.
*/
nbadquo = nbadmod = 0;
sumdifquo = sumdifmod = 0;
sumcmpquo = sumcmpmod = 0;
sumgriquo = sumgrimod = 0;
mindvd = max_uint_value(sizeof(UNSIGNED_DVD_TYPE), 0);
maxdvd = 0;
SEED_INT_RAN(bintim)
printf("Performing %u unsigned divides both ways...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
UINT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
if ((UNSIGNED_DVD_TYPE)a_dividend < (UNSIGNED_DVD_TYPE)mindvd)
mindvd = a_dividend;
if ((UNSIGNED_DVD_TYPE)a_dividend > (UNSIGNED_DVD_TYPE)maxdvd)
maxdvd = a_dividend;
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
cmpquo = (UNSIGNED_QUO_TYPE)
((UNSIGNED_DVD_TYPE)a_dividend /
(UNSIGNED_DVR_TYPE)a_divisor);
cmpmod = (UNSIGNED_MOD_TYPE)
((UNSIGNED_DVD_TYPE)a_dividend %
(UNSIGNED_DVR_TYPE)a_divisor);
UINT_DIV( (UNSIGNED_DVD_TYPE)a_dividend,
(UNSIGNED_DVR_TYPE)a_divisor,
inverse[k],rtshift[k],griquo)
INT_MOD( (UNSIGNED_DVD_TYPE)a_dividend,
(UNSIGNED_DVR_TYPE)a_divisor,
(UNSIGNED_QUO_TYPE)griquo,grimod)
sumcmpquo += cmpquo;
sumcmpmod += cmpmod;
sumgriquo += griquo;
sumgrimod += grimod;
difquo = ((UNSIGNED_QUO_TYPE)griquo - (UNSIGNED_QUO_TYPE)cmpquo);
difmod = ((UNSIGNED_QUO_TYPE)grimod - (UNSIGNED_QUO_TYPE)cmpmod);
sumdifquo += difquo;
sumdifmod += difmod;
if (difquo || difmod)
{
if (difquo) nbadquo++;
if (difmod) nbadmod++;
printf("\n");
printf("Dividend #%4d = %016LX (%Lu)\n", j+1,
(uint64)(UNSIGNED_DVD_TYPE)a_dividend,
(uint64)(UNSIGNED_DVD_TYPE)a_dividend);
printf("Divisor #%4d = %016LX (%Lu)\n", k+1,
(uint64)(UNSIGNED_DVR_TYPE)a_divisor,
(uint64)(UNSIGNED_DVR_TYPE)a_divisor);
printf("Inverse = %016LX (%Lu)\n",
(uint64)inverse[k], (uint64)inverse[k]);
printf("UMULH and MULQ = %016LX %016LX\n",
(uint64)asm("umulh %a0, %a1, %v0",
(uint64)(UNSIGNED_DVD_TYPE)a_dividend,
(uint64)inverse[k] << (8*(sizeof(uint64)-sizeof(inverse[k])))),
(uint64)(UNSIGNED_DVD_TYPE)a_dividend * (uint64)inverse[k]);
printf("Right Shift = %+d\n", (int32)rtshift[k]);
printf("Compiler Quo. = %016LX (%Lu)\n",
(uint64)(UNSIGNED_QUO_TYPE)cmpquo,
(uint64)(UNSIGNED_QUO_TYPE)cmpquo);
printf("Compiler Mod. = %016LX (%Lu)\n",
(uint64)(UNSIGNED_MOD_TYPE)cmpmod,
(uint64)(UNSIGNED_MOD_TYPE)cmpmod);
printf("Gries Quotient = %016LX (%Lu)\n",
(uint64)(UNSIGNED_QUO_TYPE)griquo,
(uint64)(UNSIGNED_QUO_TYPE)griquo);
printf("Gries Modulus = %016LX (%Lu)\n",
(uint64)(UNSIGNED_MOD_TYPE)grimod,
(uint64)(UNSIGNED_MOD_TYPE)grimod);
}
if (difquo <= -MAXHST)
neghst[MAXHST-1]++;
else if (difquo >= MAXHST)
poshst[MAXHST-1]++;
else if (difquo < 0)
neghst[-difquo]++;
else
poshst[difquo]++;
if (difquo < 0)
negdiv[k]++;
else if (difquo > 0)
posdiv[k]++;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u compiler unsigned divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpquo);
printf("...for %u compiler unsigned modulii: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpmod);
printf("...for %u Gries unsigned divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgriquo);
printf("...for %u Gries unsigned modulii: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgrimod);
printf("Minimum unsigned dividend %016LX (%Lu).\n",
(uint64)(UNSIGNED_DVD_TYPE)mindvd,
(uint64)(UNSIGNED_DVD_TYPE)mindvd);
printf("Maximum unsigned dividend %016LX (%Lu).\n",
(uint64)(UNSIGNED_DVD_TYPE)maxdvd,
(uint64)(UNSIGNED_DVD_TYPE)maxdvd);
if (nbadquo || nbadmod)
{
printf("Sum of quotient differences = %+Ld; %u incorrect quotients.\n",
sumdifquo, nbadquo);
printf("Sum of modulus differences = %+Ld; %u incorrect modulii.\n",
sumdifmod, nbadmod);
printf("Histogram of quotient differences:\n");
for (i = MAXHST-1; i > 0; i--)
if (neghst[i])
printf("%+02d: %u\n", -i, neghst[i]);
for (i = 0; i < MAXHST; i++)
if (poshst[i])
printf("%+02d: %u\n", i, poshst[i]);
for (i = 0; i < NDIVISORS; i++)
if (negdiv[i] || posdiv[i])
printf("divisor #%d: %u smaller, %u larger than correct quotient.\n",
i+1, negdiv[i], posdiv[i]);
}
/*
* Do a timing run for unsigned compiler divides only.
*/
sumcmpquo = 0;
SEED_INT_RAN(bintim)
printf("Performing %u compiler unsigned divides...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
UINT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
cmpquo = (UNSIGNED_QUO_TYPE)
((UNSIGNED_DVD_TYPE)a_dividend /
(UNSIGNED_DVR_TYPE)a_divisor);
sumcmpquo += cmpquo;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u compiler unsigned divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpquo);
/*
* Do a timing run for unsigned compiler quotient and modulus.
*/
sumcmpquo = 0;
sumcmpmod = 0;
SEED_INT_RAN(bintim)
printf("Performing %u compiler unsigned quotient and modulus...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
UINT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
cmpquo = (UNSIGNED_QUO_TYPE)
((UNSIGNED_DVD_TYPE)a_dividend /
(UNSIGNED_DVR_TYPE)a_divisor);
cmpmod = (UNSIGNED_MOD_TYPE)
((UNSIGNED_DVD_TYPE)a_dividend %
(UNSIGNED_DVR_TYPE)a_divisor);
sumcmpquo += cmpquo;
sumcmpmod += cmpmod;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u compiler unsigned divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpquo);
printf("...for %u compiler unsigned modulii: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumcmpmod);
/*
* Do a timing run for unsigned Gries divides only.
*/
sumgriquo = 0;
SEED_INT_RAN(bintim)
printf("Performing %u Gries unsigned divides...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
UINT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
UINT_DIV( (UNSIGNED_DVD_TYPE)a_dividend,
(UNSIGNED_DVR_TYPE)a_divisor,
inverse[k],rtshift[k],griquo)
sumgriquo += griquo;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u Gries unsigned divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgriquo);
/*
* Do a timing run for unsigned Gries quotient and modulus.
*/
sumgriquo = 0;
sumgrimod = 0;
SEED_INT_RAN(bintim)
printf("Performing %u Gries unsigned quotient and modulus...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
UINT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
UINT_DIV( (UNSIGNED_DVD_TYPE)a_dividend,
(UNSIGNED_DVR_TYPE)a_divisor,
inverse[k],rtshift[k],griquo)
INT_MOD( (UNSIGNED_DVD_TYPE)a_dividend,
(UNSIGNED_DVR_TYPE)a_divisor,
(UNSIGNED_QUO_TYPE)griquo,grimod)
sumgriquo += griquo;
sumgrimod += grimod;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u Gries unsigned divides: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgriquo);
printf("...for %u Gries unsigned modulii: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgrimod);
/*
* Do a timing run for unsigned loop overhead only.
*/
sumgriquo = 0;
SEED_INT_RAN(bintim)
printf("Performing %u unsigned loop overhead operations...\n",
NDIVIDENDS*NDIVISORS);
#if __VMS
lib$init_timer(0);
#else
sclk = clock();
#endif
for (j=0; j<NDIVIDENDS; j++)
{
UINT_RAN(a_dividend,j,mindvdbits,maxdvdbits)
sumgriquo += a_dividend;
for (k=0; k<NDIVISORS; k++)
{
a_divisor = divisor[k];
sumgriquo += a_divisor;
}
}
#if __VMS
lib$show_timer(0);
#else
fclk = clock();
printf("CPU: %u/%u seconds.\n", fclk-sclk, CLOCKS_PER_SEC);
#endif
printf("...for %u unsigned loop overhead operations: sum = %+Ld.\n",
NDIVIDENDS*NDIVISORS, sumgriquo);
} /* alpha_integer_divide() main_program */
|