diff --git a/tests/performance/Makefile.am b/tests/performance/Makefile.am index 2689e42..24ed791 100644 --- a/tests/performance/Makefile.am +++ b/tests/performance/Makefile.am @@ -1,6 +1,7 @@ UINT_PROGS = add_uperf BUINT128_PROGS = \ add_perf128 \ + mul_perf128 \ div10_perf128 \ div1000_perf128 \ parse_perf128 \ @@ -13,6 +14,7 @@ CLEANFILES = BUINT256_PROGS = \ add_perf256 \ + mul_perf256 \ div10_perf256 \ div1000_perf256 \ parse_perf256 \ @@ -21,6 +23,7 @@ BUINT256_PROGS = \ BUINT512_PROGS = \ add_perf512 \ + mul_perf512 \ div10_perf512 \ div1000_perf512 \ parse_perf512 \ @@ -29,6 +32,7 @@ BUINT512_PROGS = \ BUINTEXTRA_PROGS = \ add_perf@userdef_bits@ \ + mul_perf@userdef_bits@ \ div10_perf@userdef_bits@ \ div1000_perf@userdef_bits@ \ parse_perf@userdef_bits@ \ @@ -40,6 +44,7 @@ if WITH_BIGUINT256 CLEANFILES += perf_common256.h \ add_perf256.c \ + mul_perf256.c \ div10_perf256.c \ div1000_perf256.c \ parse_perf256.c \ @@ -47,6 +52,7 @@ if WITH_BIGUINT256 shift_perf256.c nodist_add_perf256_SOURCES = add_perf256.c + nodist_mul_perf256_SOURCES = mul_perf256.c nodist_div10_perf256_SOURCES = div10_perf256.c nodist_div1000_perf256_SOURCES = div1000_perf256.c nodist_parse_perf256_SOURCES = parse_perf256.c @@ -66,6 +72,7 @@ if WITH_BIGUINT512 CLEANFILES += perf_common512.h \ add_perf512.c \ + mul_perf512.c \ div10_perf512.c \ div1000_perf512.c \ parse_perf512.c \ @@ -73,6 +80,7 @@ if WITH_BIGUINT512 shift_perf512.c nodist_add_perf512_SOURCES = add_perf512.c + nodist_mul_perf512_SOURCES = mul_perf512.c nodist_div10_perf512_SOURCES = div10_perf512.c nodist_div1000_perf512_SOURCES = div1000_perf512.c nodist_parse_perf512_SOURCES = parse_perf512.c @@ -92,6 +100,7 @@ if EXTRA_BITLEN CLEANFILES += perf_common@userdef_bits@.h \ add_perf@userdef_bits@.c \ + mul_perf@userdef_bits@.c \ div10_perf@userdef_bits@.c \ div1000_perf@userdef_bits@.c \ parse_perf@userdef_bits@.c \ @@ -99,6 +108,7 @@ if EXTRA_BITLEN shift_perf@userdef_bits@.c nodist_add_perf@userdef_bits@_SOURCES = add_perf@userdef_bits@.c + nodist_mul_perf@userdef_bits@_SOURCES = mul_perf@userdef_bits@.c nodist_div10_perf@userdef_bits@_SOURCES = div10_perf@userdef_bits@.c nodist_div1000_perf@userdef_bits@_SOURCES = div1000_perf@userdef_bits@.c nodist_parse_perf@userdef_bits@_SOURCES = parse_perf@userdef_bits@.c @@ -114,9 +124,10 @@ if EXTRA_BITLEN endif AM_CPPFLAGS = -I${top_srcdir}/src -I../../src -LDADD = ../../src/libbiguint.a +LDADD = perf_common.o ../../src/libbiguint.a -EXTRA_DIST = perf_common128.h +EXTRA_DIST = perf_common128.h perf_common.h perf_common.c +DISTCLEANFILES = $(DEPDIR)/perf_common.Po BUILT_SOURCES = $(CLEANFILES) diff --git a/tests/performance/add_perf128.c b/tests/performance/add_perf128.c index 55c70a3..9c41964 100644 --- a/tests/performance/add_perf128.c +++ b/tests/performance/add_perf128.c @@ -1,129 +1,96 @@ -#include "biguint128.h" #include #include #include #include -#define LOOPS (1<<26) // 64M loops -#define BUFLEN 40 // for printsceen debugging +#include "biguint128.h" +#include "perf_common.h" +#include "perf_common128.h" -static void print_result(clock_t t_begin, clock_t t_end, const char *op, int cnt) { - clock_t dt = t_end - t_begin; - fprintf(stdout, "=== %d BigUInt128 %s operations ===\n", cnt, op); - fprintf(stdout, "Elapsed time: %ld us,\t%.1f op/s\n", dt, (1000000.0 * cnt) / dt); -} -int main() { - BigUInt128 b0 = biguint128_ctor_default(); - BigUInt128 b1 = biguint128_ctor_unit(); - uint32_t loop_cnt; - clock_t t0, t1; - char buf[BUFLEN]; +// ### Constraints and default values +#define MAX_LEVELS 8U +#define DEFAULT_LEVELS 3U +#define MAX_LOOPS (1<<28) // 256M loops +#define DEFAULT_LOOPS (1<<26) // 64M loops +#define BUFLEN 40 // for full function names +#define INC_A 37U +#define INC_B 29U - // #1: conventional add - t0 = clock(); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - b0 = biguint128_add(&b0, &b1); - } - t1 = clock(); - print_result(t0, t1, "add", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); +// ### Local types +typedef enum { + FUN_ADD = 0, + FUN_ADD_ASGN, + FUN_ADD_REPL, + FUN_ADD_TINY, + FUN_SUB, + FUN_SUB_ASGN, + FUN_SUB_REPL, + FUN_SUB_TINY +} AdditiveFun; - // #2: add-assignment - t0 = clock(); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - biguint128_add_assign(&b0, &b1); - } - t1 = clock(); - print_result(t0, t1, "add-assignment", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); +// ### Constants +const char *funname[]={ + "add", + "add_assign", + "add_replace", + "add_tiny", + "sub", + "sub_assign", + "sub_replace", + "sub_tiny" +}; +const unsigned int fun_n = sizeof(funname) / sizeof(funname[0]); - // #2: add-replace - t0 = clock(); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - biguint128_add_replace(&b0, &b0, &b1); - } - t1 = clock(); - print_result(t0, t1, "add-replace", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); +const StandardArgs ARGS_DEFAULT = { + DEFAULT_LOOPS, false, false, + DEFAULT_LEVELS, -1, -1, + INC_A, INC_B, + -1, 0 +}; - // #3: add-assign uint as biguint - { - t0 = clock(); - BigUInt128 b2 = biguint128_ctor_standard(&b1.dat[0]); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - biguint128_add_assign(&b0, &b2); - } - t1 = clock(); - print_result(t0, t1, "add-assign uint", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); - } - // #4: add-tiny - t0 = clock(); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - biguint128_add_tiny(&b0, b1.dat[0]); - } - t1 = clock(); - print_result(t0, t1, "add-tiny", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); +// ### Internal functions +static void exec_function_loop_(unsigned int ai, unsigned int bi, unsigned int fun, const StandardArgs *args) { + BigUInt128 a = get_value_by_level(ai, args->levels); + BigUInt128 b = get_value_by_level(bi, args->levels); + BigUInt128 chkval = biguint128_ctor_default(); + BigUInt128 res; + BigUInt128 *procref = (fun&1)?&a:&res; // note, every second function is an assignment operation + clock_t t0, t1; + char fnamebuf[BUFLEN]; - // #5: sub-tiny t0 = clock(); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - biguint128_sub_tiny(&b0, b1.dat[0]); - } - t1 = clock(); - print_result(t0, t1, "sub-tiny", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); - - // #6: sub-assign uint as biguint - { - t0 = clock(); - BigUInt128 b2 = biguint128_ctor_standard(&b1.dat[0]); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - biguint128_sub_assign(&b0, &b2); + for (unsigned int i = 0; i < args->loops; ++i) { + if (!(fun & 4)) { // lower 4 functions + if (fun == FUN_ADD) { + res = biguint128_add(&a, &b); + } else if (fun == FUN_ADD_ASGN) { + biguint128_add_assign(&a, &b); + } else if (fun == FUN_ADD_TINY) { + biguint128_add_tiny(&a, b.dat[0]); + } else if (fun == FUN_ADD_REPL) { + biguint128_add_replace(&res, &a, &b); + } + } else { // upper 4 functions + if (fun == FUN_SUB) { + res = biguint128_sub(&a, &b); + } else if (fun == FUN_SUB_ASGN) { + biguint128_sub_assign(&a, &b); + } else if (fun == FUN_SUB_TINY) { + biguint128_sub_tiny(&a, b.dat[0]); + } else if (fun == FUN_SUB_REPL) { + biguint128_sub_replace(&res, &a, &b); + } } - t1 = clock(); - print_result(t0, t1, "sub-assign uint", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); - } - - // #7: sub-replace - t0 = clock(); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - biguint128_sub_replace(&b0, &b0, &b1); + process_result_v1(procref, &chkval.dat[0]); + inc_operands_v1(&a, &b, args->diff_a, args->diff_b); } t1 = clock(); - print_result(t0, t1, "sub-replace", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); - - // #8: sub-assignment - t0 = clock(); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - biguint128_sub_assign(&b0, &b1); - } - t1 = clock(); - print_result(t0, t1, "sub-assignment", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); - - // #9: conventional sub - t0 = clock(); - for (loop_cnt = 0; loop_cnt < LOOPS; ++loop_cnt) { - b0 = biguint128_sub(&b0, &b1); - } - t1 = clock(); - print_result(t0, t1, "sub", LOOPS); - buf[biguint128_print_dec(&b0, buf, BUFLEN)] = 0; - fprintf(stdout, "(current sum: %s)\n", buf); + snprintf(fnamebuf, BUFLEN, "%s + 2*add_tiny(C)", funname[fun]); + print_exec_summary(t0, t1, fnamebuf, args->loops, &chkval, 1); +} - return 0; +// ### Main function +int main(int argc, const char *argv[]) { + return fun2_main(argc, argv, 128, ARGS_DEFAULT, MAX_LEVELS, MAX_LOOPS, fun_n, funname, &exec_function_loop_); } diff --git a/tests/performance/mul_perf128.c b/tests/performance/mul_perf128.c new file mode 100644 index 0000000..5eaf2cb --- /dev/null +++ b/tests/performance/mul_perf128.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include + +#include "biguint128.h" +#include "perf_common.h" +#include "perf_common128.h" + + +// ### Constraints and default values +#define MAX_LEVELS 8U +#define DEFAULT_LEVELS 3U +#define MAX_LOOPS (1<<26) // 64M loops +#define DEFAULT_LOOPS (1<<20) // 1M loops +#define BUFLEN 40 // for full function names +#define INC_A 37U +#define INC_B 29U + +// ### Local types +typedef enum { + FUN_MUL = 0, + FUN_DMUL, + FUN_DIV +} MultiplicativeFun; + +// ### Constants +const char *funname[]={ + "mul", + "dmul", + "div" +}; +const unsigned int fun_n = sizeof(funname) / sizeof(funname[0]); + +const StandardArgs ARGS_DEFAULT = { + DEFAULT_LOOPS, false, false, + DEFAULT_LEVELS, -1, -1, + INC_A, INC_B, + -1, 0 +}; + + +// ### Internal functions +static void exec_function_loop_(unsigned int ai, unsigned int bi, unsigned int fun, const StandardArgs *args) { + BigUInt128 a = get_value_by_level(ai, args->levels); + BigUInt128 b = get_value_by_level(bi, args->levels); + BigUInt128 chkval = biguint128_ctor_default(); + BigUInt128 res; + BigUIntPair128 resp; + BigUInt128 *procref1 = fun==FUN_MUL?&res:&resp.first; + BigUInt128 *procref2 = fun==FUN_MUL?&res:&resp.second; + clock_t t0, t1; + char fnamebuf[BUFLEN]; + + t0 = clock(); + for (unsigned int i = 0; i < args->loops; ++i) { + if (fun == FUN_MUL) { + res = biguint128_mul(&a, &b); + } else if (fun == FUN_DMUL) { + resp = biguint128_dmul(&a, &b); + } else if (fun == FUN_DIV) { + resp = biguint128_div(&a, &b); + } + process_result_v1(procref2, &chkval.dat[0]); + process_result_v1(procref1, &chkval.dat[0]); + inc_operands_v1(&a, &b, (UInt) args->diff_a, (UInt) args->diff_b); + } + t1 = clock(); + snprintf(fnamebuf, BUFLEN, "%s + 2*add_tiny(C)", funname[fun]); + print_exec_summary(t0, t1, fnamebuf, args->loops, &chkval, 1); +} + +// ### Main function +int main(int argc, const char *argv[]) { + return fun2_main(argc, argv, 128, ARGS_DEFAULT, MAX_LEVELS, MAX_LOOPS, fun_n, funname, &exec_function_loop_); +} diff --git a/tests/performance/perf_common.c b/tests/performance/perf_common.c new file mode 100644 index 0000000..7be1eda --- /dev/null +++ b/tests/performance/perf_common.c @@ -0,0 +1,136 @@ +#include +#include + +#include "perf_common.h" + +StandardArgs parse_args(int argc, const char *argv[], const StandardArgs res_init) { + StandardArgs retv = res_init; + int argi = 1; + while (argi < argc) { + if (argv[argi][0]!='-') { + retv.error = true; + return retv; + } + int val = 0; + int arginc = 1; + char c1 = argv[argi][1]; + char c2 = c1?argv[argi][2]:c1; + char c3 = c2?argv[argi][3]:c2; + if (c1 != 'h') { // option -h is exceptional + val = argi+1]", OPTSEP); + if (argmask&ARGMASK_LEVELS) fprintf(stderr, "%s[-l ]", OPTSEP); + if (argmask&ARGMASK_LMASKA) fprintf(stderr, "%s[-la ]", OPTSEP); + if (argmask&ARGMASK_LMASKB) fprintf(stderr, "%s[-lb ]", OPTSEP); + if (argmask&ARGMASK_DIFFA) fprintf(stderr, "%s[-da ]", OPTSEP); + if (argmask&ARGMASK_DIFFB) fprintf(stderr, "%s[-db ]", OPTSEP); + if (argmask&ARGMASK_FMASK) fprintf(stderr, "%s[-f ]", OPTSEP); + if (argmask&ARGMASK_FEXMASK) fprintf(stderr, "%s[-F ]", OPTSEP); + fprintf(stderr, "%s[-h]\n", OPTSEP); +} + +void print_help_all(const char *prgname, unsigned int bits, unsigned int argmask, unsigned int fun_n, const char *funname[]) { + print_help(prgname, argmask); + fprintf(stderr, "where function check bitmask bits mean\n"); + for (unsigned int i=0; i + +typedef struct { + unsigned int loops; + bool error; + bool help; + unsigned char levels; + unsigned int lmask_a; + unsigned int lmask_b; + int diff_a; + int diff_b; + unsigned int fmask; + unsigned int fexmask; +} StandardArgs; + +#define ARGMASK_LOOPS 1U +#define ARGMASK_LEVELS 2U +#define ARGMASK_LMASKA 4U +#define ARGMASK_LMASKB 8U +#define ARGMASK_DIFFA 16U +#define ARGMASK_DIFFB 32U +#define ARGMASK_FMASK 64U +#define ARGMASK_FEXMASK 128U +#define ARGMASK_ALL 255U + +StandardArgs parse_args(int argc, const char *argv[], const StandardArgs res_init); +void print_help(const char *prgname, unsigned int argmask); +void print_help_all(const char *prgname, unsigned int bits, unsigned int argmask, unsigned int fun_n, const char *funname[]); +int fun2_main(int argc, const char *argv[], + unsigned int bits, const StandardArgs args_init, + unsigned int max_levels, unsigned int max_loops, + unsigned int fun_n, const char *funname[], + void (*internal_loop)(unsigned int ai, unsigned int bi, unsigned int funidx, const StandardArgs *args)); + +#endif /* PERF_COMMON_H */ + diff --git a/tests/performance/perf_common128.h b/tests/performance/perf_common128.h index 95b631f..56668d8 100644 --- a/tests/performance/perf_common128.h +++ b/tests/performance/perf_common128.h @@ -18,7 +18,7 @@ typedef struct { static inline void print_exec_time(clock_t t_begin, clock_t t_end, const char *op, int cnt) { clock_t dt = t_end - t_begin; fprintf(stdout, "=== %d BigUInt128 %s operations ===\n", cnt, op); - fprintf(stdout, "Elapsed time: %ld us,\t%.1f op/s\n", dt, (1000000.0 * cnt) / dt); + fprintf(stdout, " Elapsed time: %ld us,\t%.1f op/s\n", dt, (1000000.0 * cnt) / dt); } static inline void print_exec_summary(clock_t t_begin, clock_t t_end, const char *op, int cnt, const BigUInt128 *val, int valnum) { @@ -26,7 +26,7 @@ static inline void print_exec_summary(clock_t t_begin, clock_t t_end, const char print_exec_time(t_begin, t_end, op, cnt); for (int i=0; idat[idx]; + if (++idx == BIGUINT128_CELLS) idx = 0; +} + #undef UINT_BITS #undef BIGUINT_BITS #undef DEC_BIGUINTLEN