-
Notifications
You must be signed in to change notification settings - Fork 89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fast multiplication #11
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -273,6 +273,45 @@ void bignum_mul(struct bn* a, struct bn* b, struct bn* c) | |
} | ||
|
||
|
||
void bignum_mul_alt(struct bn *a, struct bn *b, struct bn *c) | ||
{ | ||
require(a, "a is null"); | ||
require(b, "b is null"); | ||
require(c, "c is null"); | ||
|
||
bignum_init(c); | ||
DTYPE_TMP tmp = 0; | ||
DTYPE tmp_to_add = 0; | ||
|
||
int usable_len = BN_ARRAY_SIZE; | ||
|
||
/* this section speads up algorithm by "cutting" len of bignum*/ | ||
for (int i = BN_ARRAY_SIZE - 1; i >= 0; --i) | ||
{ | ||
if (a->array[i] != 0 || b->array[i] != 0) | ||
{ | ||
usable_len = 2 * (i + 1); | ||
break; | ||
} | ||
} | ||
|
||
usable_len = usable_len > BN_ARRAY_SIZE ? BN_ARRAY_SIZE : usable_len; | ||
// | ||
|
||
for (int i = 0; i < usable_len; ++i) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part is brilliant. I love how you've reduced the allocation of two bignum's into none. That really cuts straight to the bone 👍 However I would like to avoid the scoped-declarations of variables inside the for-loop for stylistic reasons and to be a bit more portable against shitty/subset compilers. I will pull it as-is and then edit it myself, no worries. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Declarations in for loops are provided by C standard, so if compiler does not support the standard it is not a compiler but shit. It is better style to use temporary variables like iterators in scopes. But if you want to keep same style it's ok for me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The old multiplication is O(n^3). New is only O(n^2). |
||
{ | ||
c->array[i] += tmp_to_add; | ||
tmp_to_add = tmp = 0; | ||
for (int j = 0, k = i; j < i + 1 && j < usable_len; ++j, --k) | ||
{ | ||
tmp = (DTYPE_TMP)a->array[j] * (DTYPE_TMP)b->array[k]; | ||
tmp_to_add += tmp >> 32; | ||
c->array[i] += tmp; | ||
} | ||
} | ||
} | ||
|
||
|
||
void bignum_div(struct bn* a, struct bn* b, struct bn* c) | ||
{ | ||
require(a, "a is null"); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#include <stdio.h> | ||
#include <time.h> | ||
#include <stdlib.h> | ||
|
||
#include "bn.h" | ||
|
||
int mul_get_clocks(struct bn *a, struct bn *b, struct bn *c) | ||
{ | ||
int clocks = clock(); | ||
bignum_mul(a, b, c); | ||
return clock() - clocks; | ||
} | ||
|
||
int mul_alt_get_clocks(struct bn *a, struct bn *b, struct bn *c) | ||
{ | ||
int clocks = clock(); | ||
bignum_mul_alt(a, b, c); | ||
return clock() - clocks; | ||
} | ||
|
||
int int_test() | ||
{ | ||
struct bn a, b, c; | ||
int clocks = 0, | ||
clocks_alt = 0, | ||
num1 = 0, | ||
num2 = 0; | ||
char res[17000] = {0}, | ||
res_alt[17000] = {0}; | ||
bignum_init(&a); | ||
bignum_init(&b); | ||
bignum_init(&c); | ||
printf("method\tnum1\tnum2\tresult\tclocks\n"); | ||
|
||
for (int i = 0; i < 10; ++i) | ||
{ | ||
num1 = rand(); | ||
num2 = rand(); | ||
|
||
bignum_from_int(&a, num1); | ||
bignum_from_int(&b, num2); | ||
|
||
clocks = mul_get_clocks(&a, &b, &c); | ||
bignum_to_string(&c, res, 17000); | ||
|
||
clocks_alt = mul_alt_get_clocks(&a, &b, &c); | ||
bignum_to_string(&c, res_alt, 17000); | ||
|
||
printf("normal\t%d\t%d\t%s\t%d\n", num1, num2, res, clocks); | ||
printf("alter\t%d\t%d\t%s\t%d\n", num1, num2, res_alt, clocks_alt); | ||
printf("\n"); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
int bigger_then_int_test() | ||
{ | ||
struct bn a, b, c; | ||
int clocks = 0, | ||
clocks_alt = 0; | ||
char res[17000] = {0}, | ||
res_alt[17000] = {0}, | ||
num1[17000] = {0}, | ||
num2[17000] = {0}; | ||
|
||
bignum_init(&a); | ||
bignum_init(&b); | ||
bignum_init(&c); | ||
printf("method\tnum1\tnum2\tresult\tclocks\n"); | ||
|
||
for (int i = 0; i < 10; ++i) | ||
{ | ||
a.array[0] = rand(); | ||
a.array[1] = rand(); | ||
a.array[2] = rand(); | ||
a.array[3] = rand(); | ||
|
||
b.array[0] = rand(); | ||
b.array[1] = rand(); | ||
b.array[2] = rand(); | ||
b.array[3] = rand(); | ||
|
||
bignum_to_string(&a, num1, 17000); | ||
bignum_to_string(&b, num2, 17000); | ||
|
||
clocks = mul_get_clocks(&a, &b, &c); | ||
bignum_to_string(&c, res, 17000); | ||
|
||
clocks_alt = mul_alt_get_clocks(&a, &b, &c); | ||
bignum_to_string(&c, res_alt, 17000); | ||
|
||
printf("normal\t%s\t%s\t%s\t%d\n", num1, num2, res, clocks); | ||
printf("alter\t%s\t%s\t%s\t%d\n", num1, num2, res_alt, clocks_alt); | ||
printf("\n"); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
int main() | ||
{ | ||
srand(time(NULL)); | ||
int_test(); | ||
bigger_then_int_test(); | ||
return 0; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't want to pull this part. Let me first comment it, and then tell you the reason why I would like to avoid it,
I appreciate the effort and understand why you would want this here (and in every other functions. You could also maintain a bitset of highest used bit to optimize for speed.
However it adds to the size of both source- and object code. I also fear you might miss out on vectorization optimizations from the compiler, but I wouldn't know for sure. It also makes you susceptible to leaking info via timing if used in encryption algorithms. I mostly want to take a raincheck because it also breaks the beautiful simplicity of the naïive algorithm.
Thanks for the PR - I really appreciate the effort you've put into this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Vector operations bases on processor words so don't worry.
I understand the timing problem in cryptography.
I am not sure are you talking about the same peace of code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So the section with finding actual length of number can be removed.