-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtmp_par.cu
85 lines (31 loc) · 1.01 KB
/
tmp_par.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include <stdio.h>
int n = 1;
int *count;
cudaMallocManaged(&count, n * sizeof(int));
int n = 1;
int *valor;
cudaMallocManaged(&valor, n * sizeof(int));
// #include <omp.h>
// #include <stdio.h>
// #include <stdlib.h>
size_t number_of_blocks = 1000;
size_t threads_per_block = 1000;
//Chamada da função GPUFuncion
__global__ void GPUFunction(double nCuda , *valor , double *valor ){
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int totalThreads = gridDim.x * blockDim.x;
for (double idxCuda = idx; idxCuda < nCuda ; idxCuda += totalThreads){
//parte que será paralelizada
valor = valor + 4.0/(1.0 + ((idx + 0.5)*(1.0/(double)2147480000))*((idx + 0.5)*(1.0/(double)2147480000)));
}
}
int main(int argc, char** argv){
double valor = 0;
// double passo;
// passo = 1.0/(double)2147480000;
GPUFunction<<<number_of_blocks, threads_per_block>>>(2147480000, count, valor);
cudaDeviceSynchronize();
valor = valor*(1.0/(double)2147480000);
printf("%f\n", valor);
return 0;
}