-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathg1-tensor.cuh
221 lines (136 loc) · 6.23 KB
/
g1-tensor.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#ifndef G1_TENSOR_CUH
#define G1_TENSOR_CUH
#include <iostream>
#include <iomanip>
#include "bls12-381.cuh"
#include "fr-tensor.cuh"
using namespace std;
typedef blstrs__fp__Fp Fp_t;
const uint G1NumThread = 64;
const uint G1AffineSharedMemorySize = 2 * sizeof(G1Affine_t) * G1NumThread;
const uint G1JacobianSharedMemorySize = 2 * sizeof(G1Jacobian_t) * G1NumThread;
DEVICE Fp_t Fp_minus(Fp_t a);
DEVICE G1Affine_t G1Affine_minus(G1Affine_t a);
DEVICE G1Jacobian_t G1Jacobian_minus(G1Jacobian_t a);
ostream& operator<<(ostream& os, const Fp_t& x);
ostream& operator<<(ostream& os, const G1Affine_t& g);
ostream& operator<<(ostream& os, const G1Jacobian_t& g);
// x_mont = 0x120177419e0bfb75edce6ecc21dbf440f0ae6acdf3d0e747154f95c7143ba1c17817fc679976fff55cb38790fd530c16
const Fp_t G1_generator_x_mont = {
4250078230,
1555269520,
2574712821,
2014837863,
339452353,
357537223,
4090554183,
4037962445,
568063040,
3989728972,
2651585397,
302085953
};
// y_mont = 0xbbc3efc5008a26a0e1c8c3fad0059c051ac582950405194dd595f13570725ce8c22631a7918fd8ebaac93d50ce72271
const Fp_t G1_generator_y_mont = {
216474225,
3131872213,
2031680910,
2351063834,
1460086222,
3713621779,
1346392468,
1370249257,
2902481344,
236751935,
1342743146,
196886268
};
const Fp_t G1_ONE = {196605, 1980301312, 3289120770, 3958636555, 1405573306, 1598593111, 1884444485, 2010011731, 2723605613, 1543969431, 4202751123, 368467651};
const G1Affine_t G1Affine_generator {G1_generator_x_mont, G1_generator_y_mont};
const G1Jacobian_t G1Jacobian_generator {G1_generator_x_mont, G1_generator_y_mont, G1_ONE};
class G1Tensor
{
public:
const uint size;
G1Tensor(uint size);
};
class G1TensorAffine;
class G1TensorJacobian;
class G1TensorAffine: public G1Tensor
{
protected:
G1Affine_t* gpu_data;
public:
G1TensorAffine(const G1TensorAffine&);
G1TensorAffine(uint size);
G1TensorAffine(uint size, const G1Affine_t&);
G1TensorAffine(uint size, const G1Affine_t* cpu_data);
~G1TensorAffine();
G1Affine_t operator()(uint idx) const;
// {
// G1Affine_t out;
// cudaMemcpy(&out, gpu_data + idx, sizeof(G1Affine_t), cudaMemcpyDeviceToHost);
// return out;
// }
G1TensorAffine operator-() const;
G1TensorJacobian& operator*(const FrTensor&);
friend class G1TensorJacobian;
};
class Commitment;
class G1TensorJacobian: public G1Tensor
{
protected:
G1Jacobian_t* gpu_data;
public:
G1TensorJacobian(const G1TensorJacobian&);
G1TensorJacobian(uint size);
G1TensorJacobian(uint size, const G1Jacobian_t&);
G1TensorJacobian(uint size, const G1Jacobian_t* cpu_data);
G1TensorJacobian(const G1TensorAffine& affine_tensor);
~G1TensorJacobian();
G1Jacobian_t operator()(uint) const;
G1TensorJacobian operator-() const;
G1TensorJacobian operator+(const G1TensorJacobian&) const;
G1TensorJacobian operator+(const G1TensorAffine&) const;
G1TensorJacobian operator+(const G1Jacobian_t&) const;
G1TensorJacobian operator+(const G1Affine_t&) const;
G1TensorJacobian& operator+=(const G1TensorJacobian&);
G1TensorJacobian& operator+=(const G1TensorAffine&);
G1TensorJacobian& operator+=(const G1Jacobian_t&);
G1TensorJacobian& operator+=(const G1Affine_t&);
G1TensorJacobian operator-(const G1TensorJacobian&) const;
G1TensorJacobian operator-(const G1TensorAffine&) const;
G1TensorJacobian operator-(const G1Jacobian_t&) const;
G1TensorJacobian operator-(const G1Affine_t&) const;
G1TensorJacobian& operator-=(const G1TensorJacobian&);
G1TensorJacobian& operator-=(const G1TensorAffine&);
G1TensorJacobian& operator-=(const G1Jacobian_t&);
G1TensorJacobian& operator-=(const G1Affine_t&);
G1Jacobian_t sum() const;
G1TensorJacobian operator*(const FrTensor&) const;
G1TensorJacobian& operator*=(const FrTensor&);
G1Jacobian_t operator()(const vector<Fr_t>& u) const;
friend G1Jacobian_t G1_me(const G1TensorJacobian& t, vector<Fr_t>::const_iterator begin, vector<Fr_t>::const_iterator end);
friend class G1TensorAffine;
friend class Commitment;
};
// Implement G1Affine
KERNEL void G1Affine_assign_broadcast(GLOBAL G1Affine_t* arr, GLOBAL G1Affine_t g, uint n);
KERNEL void G1_affine_elementwise_minus(GLOBAL G1Affine_t* arr_in, GLOBAL G1Affine_t* arr_out, uint n);
KERNEL void G1Jacobian_assign_broadcast(GLOBAL G1Jacobian_t* arr, G1Jacobian_t g, uint n);
KERNEL void G1_affine_to_jacobian(GLOBAL G1Affine_t* arr_affine, GLOBAL G1Jacobian_t* arr_jacobian, uint n);
KERNEL void G1_jacobian_elementwise_minus(GLOBAL G1Jacobian_t* arr_in, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_elementwise_add(GLOBAL G1Jacobian_t* arr1, GLOBAL G1Jacobian_t* arr2, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_broadcast_add(GLOBAL G1Jacobian_t* arr, G1Jacobian_t x, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_elementwise_madd(GLOBAL G1Jacobian_t* arr1, GLOBAL G1Affine_t* arr2, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_broadcast_madd(GLOBAL G1Jacobian_t* arr, G1Affine_t x, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_elementwise_sub(GLOBAL G1Jacobian_t* arr1, GLOBAL G1Jacobian_t* arr2, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_broadcast_sub(GLOBAL G1Jacobian_t* arr, G1Jacobian_t x, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_elementwise_msub(GLOBAL G1Jacobian_t* arr1, GLOBAL G1Affine_t* arr2, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_broadcast_msub(GLOBAL G1Jacobian_t* arr, G1Affine_t x, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1Jacobian_sum_reduction(GLOBAL G1Jacobian_t *arr, GLOBAL G1Jacobian_t *output, uint n);
DEVICE G1Jacobian_t G1Jacobian_mul(G1Jacobian_t a, Fr_t x);
KERNEL void G1_jacobian_elementwise_mul(GLOBAL G1Jacobian_t* arr_g1, GLOBAL Fr_t* arr_fr, GLOBAL G1Jacobian_t* arr_out, uint n);
KERNEL void G1_jacobian_elementwise_mul_broadcast(GLOBAL G1Jacobian_t* arr_g1, GLOBAL Fr_t* arr_fr, GLOBAL G1Jacobian_t* arr_out, uint n, uint m);
KERNEL void G1_me_step(GLOBAL G1Jacobian_t *arr_in, GLOBAL G1Jacobian_t *arr_out, Fr_t x, uint in_size, uint out_size);
#endif