Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fast multiplication #11

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ all:
@$(CC) $(CFLAGS) bn.c ./tests/factorial.c -o ./build/test_factorial
@$(CC) $(CFLAGS) bn.c ./tests/randomized.c -o ./build/test_random
@#$(CC) $(CFLAGS) bn.c ./tests/rsa.c -o ./build/test_rsa
@$(CC) $(CFLAGS) bn.c ./tests/mul_test.c -o ./build/test_mul


test:
Expand Down
39 changes: 39 additions & 0 deletions bn.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,45 @@ void bignum_mul(struct bn* a, struct bn* b, struct bn* c)
}


void bignum_mul_alt(struct bn *a, struct bn *b, struct bn *c)
{
require(a, "a is null");
require(b, "b is null");
require(c, "c is null");

bignum_init(c);
DTYPE_TMP tmp = 0;
DTYPE tmp_to_add = 0;

int usable_len = BN_ARRAY_SIZE;

/* this section speads up algorithm by "cutting" len of bignum*/
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't want to pull this part. Let me first comment it, and then tell you the reason why I would like to avoid it,

I appreciate the effort and understand why you would want this here (and in every other functions. You could also maintain a bitset of highest used bit to optimize for speed.
However it adds to the size of both source- and object code. I also fear you might miss out on vectorization optimizations from the compiler, but I wouldn't know for sure. It also makes you susceptible to leaking info via timing if used in encryption algorithms. I mostly want to take a raincheck because it also breaks the beautiful simplicity of the naïive algorithm.

Thanks for the PR - I really appreciate the effort you've put into this

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vector operations bases on processor words so don't worry.
I understand the timing problem in cryptography.
I am not sure are you talking about the same peace of code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the section with finding actual length of number can be removed.

for (int i = BN_ARRAY_SIZE - 1; i >= 0; --i)
{
if (a->array[i] != 0 || b->array[i] != 0)
{
usable_len = 2 * (i + 1);
break;
}
}

usable_len = usable_len > BN_ARRAY_SIZE ? BN_ARRAY_SIZE : usable_len;
//

for (int i = 0; i < usable_len; ++i)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part is brilliant. I love how you've reduced the allocation of two bignum's into none. That really cuts straight to the bone 👍

However I would like to avoid the scoped-declarations of variables inside the for-loop for stylistic reasons and to be a bit more portable against shitty/subset compilers. I will pull it as-is and then edit it myself, no worries.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Declarations in for loops are provided by C standard, so if compiler does not support the standard it is not a compiler but shit. It is better style to use temporary variables like iterators in scopes. But if you want to keep same style it's ok for me.
The main improvement is reducing time complexity.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old multiplication is O(n^3). New is only O(n^2).

{
c->array[i] += tmp_to_add;
tmp_to_add = tmp = 0;
for (int j = 0, k = i; j < i + 1 && j < usable_len; ++j, --k)
{
tmp = (DTYPE_TMP)a->array[j] * (DTYPE_TMP)b->array[k];
tmp_to_add += tmp >> 32;
c->array[i] += tmp;
}
}
}


void bignum_div(struct bn* a, struct bn* b, struct bn* c)
{
require(a, "a is null");
Expand Down
6 changes: 4 additions & 2 deletions bn.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ There may well be room for performance-optimizations and improvements.
#define WORD_SIZE 4
#endif

/* Size of big-numbers in bytes */
#define BN_ARRAY_SIZE (128 / WORD_SIZE)
/* Size of big-numbers in WORDS */ // because you dividing by WORD size
// #define BN_ARRAY_SIZE (128 / WORD_SIZE)
#define BN_ARRAY_SIZE 1024


/* Here comes the compile-time specialization for how large the underlying array size should be. */
Expand Down Expand Up @@ -97,6 +98,7 @@ void bignum_to_string(struct bn* n, char* str, int maxsize);
void bignum_add(struct bn* a, struct bn* b, struct bn* c); /* c = a + b */
void bignum_sub(struct bn* a, struct bn* b, struct bn* c); /* c = a - b */
void bignum_mul(struct bn* a, struct bn* b, struct bn* c); /* c = a * b */
void bignum_mul_alt(struct bn* a, struct bn* b, struct bn* c); /* c = a * b alternative, much faster method*/
void bignum_div(struct bn* a, struct bn* b, struct bn* c); /* c = a / b */
void bignum_mod(struct bn* a, struct bn* b, struct bn* c); /* c = a % b */
void bignum_divmod(struct bn* a, struct bn* b, struct bn* c, struct bn* d); /* c = a/b, d = a%b */
Expand Down
107 changes: 107 additions & 0 deletions tests/mul_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#include <stdio.h>
#include <time.h>
#include <stdlib.h>

#include "bn.h"

int mul_get_clocks(struct bn *a, struct bn *b, struct bn *c)
{
int clocks = clock();
bignum_mul(a, b, c);
return clock() - clocks;
}

int mul_alt_get_clocks(struct bn *a, struct bn *b, struct bn *c)
{
int clocks = clock();
bignum_mul_alt(a, b, c);
return clock() - clocks;
}

int int_test()
{
struct bn a, b, c;
int clocks = 0,
clocks_alt = 0,
num1 = 0,
num2 = 0;
char res[17000] = {0},
res_alt[17000] = {0};
bignum_init(&a);
bignum_init(&b);
bignum_init(&c);
printf("method\tnum1\tnum2\tresult\tclocks\n");

for (int i = 0; i < 10; ++i)
{
num1 = rand();
num2 = rand();

bignum_from_int(&a, num1);
bignum_from_int(&b, num2);

clocks = mul_get_clocks(&a, &b, &c);
bignum_to_string(&c, res, 17000);

clocks_alt = mul_alt_get_clocks(&a, &b, &c);
bignum_to_string(&c, res_alt, 17000);

printf("normal\t%d\t%d\t%s\t%d\n", num1, num2, res, clocks);
printf("alter\t%d\t%d\t%s\t%d\n", num1, num2, res_alt, clocks_alt);
printf("\n");
}

return 0;
}

int bigger_then_int_test()
{
struct bn a, b, c;
int clocks = 0,
clocks_alt = 0;
char res[17000] = {0},
res_alt[17000] = {0},
num1[17000] = {0},
num2[17000] = {0};

bignum_init(&a);
bignum_init(&b);
bignum_init(&c);
printf("method\tnum1\tnum2\tresult\tclocks\n");

for (int i = 0; i < 10; ++i)
{
a.array[0] = rand();
a.array[1] = rand();
a.array[2] = rand();
a.array[3] = rand();

b.array[0] = rand();
b.array[1] = rand();
b.array[2] = rand();
b.array[3] = rand();

bignum_to_string(&a, num1, 17000);
bignum_to_string(&b, num2, 17000);

clocks = mul_get_clocks(&a, &b, &c);
bignum_to_string(&c, res, 17000);

clocks_alt = mul_alt_get_clocks(&a, &b, &c);
bignum_to_string(&c, res_alt, 17000);

printf("normal\t%s\t%s\t%s\t%d\n", num1, num2, res, clocks);
printf("alter\t%s\t%s\t%s\t%d\n", num1, num2, res_alt, clocks_alt);
printf("\n");
}

return 0;
}

int main()
{
srand(time(NULL));
int_test();
bigger_then_int_test();
return 0;
}