-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathno_mpi_modification.f90
125 lines (82 loc) · 2.8 KB
/
no_mpi_modification.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
PROGRAM no_mpi_modification
USE ReadFile
USE PrintAll
use omp_lib
use, intrinsic :: iso_c_binding
IMPLICIT NONE
INTEGER :: c
INTEGER :: i, j, k, offset
double precision :: wct_start,wct_end,cput_start,cput_end,runtime
DOUBLE PRECISION, DIMENSION (:), ALLOCATABLE :: X, XA1, XA2, Y
INTEGER, DIMENSION (:), ALLOCATABLE :: L1, L1_1, L2, L2_1
INTEGER, DIMENSION (:), ALLOCATABLE :: VAL_1, VAL_2
INTEGER, DIMENSION (:), ALLOCATABLE :: G1, G2
INTEGER, DIMENSION (:), ALLOCATABLE :: row1, row2, COL_1, COL_2
!INTEGER :: mi, ma
num_of_threads = 8
CALL get_command_argument(1, num1char)
read (num1char, *) num_of_threads
!print *, ":", command_argument_count()
!print *,"Enter number of threads: "
!read (*, *) num_of_threads
i = 0
c = 0
Length = 12 * 100 * 1000
N = 1140404
M = 1140407
!print *, 'N = ', N, ' trial: ', trial
CALL LoadArray(X, XA1, XA2, Y)!, G1, G2)
CALL LoadGLOSEG(G1, G2, Length)
CALL LoadIndexes(L1, L1_1, L2, L2_1, VAL_1, &
VAL_2, COL_1, COL_2, row1, row2,&
G1, G2, ma, mi)
CALL LoadSpareMatrix(L1, L1_1, L2, L2_1, &
VAL_1, VAL_2, COL_1, COL_2, row1,&
row2, G1, G2, mi)
call OMP_SET_NUM_THREADS(num_of_threads)
deallocate(L1)
deallocate(L1_1)
deallocate(L2)
deallocate(L2_1)
N_Length = ma - mi + 1
N_Loops = N
!if ( rank == 0 ) then
!print *, 'trial:', trial
call timing(wct_start,cput_start)
DO c = 1, trial
!$omp parallel do schedule(static)
!!$omp parallel do schedule(dynamic)
do i = 1, ma - mi + 1 !N_Length !
do j = row1(i), row1(i + 1) - 1
X(i) = X(i) + XA1(VAL_1(j))* Y(COL_1(j))
enddo
do j = row2(i), row2(i + 1) - 1
X(i) = X(i) + XA2(VAL_2(j))* Y(COL_2(j))
enddo
enddo
!$omp end parallel do
IF(i - M > M) THEN
CALL dummy(X, XA1, XA2, Y)
ENDIF
ENDDO !DO c = 1, trial
call timing(wct_end,cput_end)
runtime = wct_end-wct_start
print *, "Time = ", runtime, "seconds"
!print *,"Performance: ", dble(trial)*N*2/runtime/1000000.d0," MIt/s"
print *,"Performance: ", dble(trial)*N_Loops*2/runtime/1000000.d0," MFlop/s"
!endif
!call MPI_Finalize ( ierr )
DEALLOCATE (G1)
DEALLOCATE (XA1)
DEALLOCATE (XA2)
DEALLOCATE (Y)
DEALLOCATE (G2)
!print *, 'X=', sizeof(X)
DEALLOCATE (X)
DEALLOCATE (VAL_1)
DEALLOCATE (COL_1)
DEALLOCATE (VAL_2)
DEALLOCATE (COL_2)
DEALLOCATE(row1)
DEALLOCATE(row2)
END PROGRAM no_mpi_modification