FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_solver_las_22.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
7
8 private
9
10 public :: hecmw_matvec_22
13 public :: hecmw_matresid_22
14 public :: hecmw_rel_resid_l2_22
15 public :: hecmw_tvec_22
16 public :: hecmw_ttvec_22
17 public :: hecmw_ttmattvec_22
18 public :: hecmw_mat_diag_sr_22
19
20 ! ! for communication hiding in matvec
21 ! integer(kind=kint), save, allocatable :: index_o(:), item_o(:)
22 ! real(kind=kreal), save, allocatable :: A_o(:)
23 logical, save :: async_matvec_flg = .false.
24contains
25
26 !C
27 !C***
28 !C*** hecmw_matvec_22
29 !C***
30 !C
31 subroutine hecmw_matvec_22 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
32 use hecmw_util
34 implicit none
35 type (hecmwst_local_mesh), intent(in) :: hecmesh
36 type (hecmwst_matrix), intent(in), target :: hecmat
37 real(kind=kreal), intent(in) :: x(:)
38 real(kind=kreal), intent(out) :: y(:)
39 real(kind=kreal), intent(inout) :: time_ax
40 real(kind=kreal), intent(inout), optional :: commtime
41
42 real(kind=kreal) :: tcomm
43 real(kind=kreal), allocatable :: wk(:)
44
45 tcomm = 0.d0
46
47 if (hecmw_mat_get_flag_mpcmatvec(hecmat) /= 0) then
48 allocate(wk(hecmat%NP * hecmat%NDOF))
49 call hecmw_ttmattvec_22(hecmesh, hecmat, x, y, wk, tcomm)
50 deallocate(wk)
51 else
52 call hecmw_matvec_22_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
53 endif
54
55 if (present(commtime)) commtime = commtime + tcomm
56 end subroutine hecmw_matvec_22
57
58 subroutine hecmw_matvec_22_set_async (hecMAT)
59 use hecmw_util
60 implicit none
61 type (hecmwst_matrix), intent(in) :: hecmat
62 end subroutine hecmw_matvec_22_set_async
64 implicit none
65 end subroutine hecmw_matvec_22_unset_async
66
67 !C
68 !C***
69 !C*** hecmw_matvec_22_inner ( private subroutine )
70 !C***
71 !C
72 subroutine hecmw_matvec_22_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
73 use hecmw_util
79 !$ use omp_lib
80
81 implicit none
82 type (hecmwst_local_mesh), intent(in) :: hecmesh
83 type (hecmwst_matrix), intent(in), target :: hecmat
84 real(kind=kreal), intent(in) :: x(:)
85 real(kind=kreal), intent(out) :: y(:)
86 real(kind=kreal), intent(inout) :: time_ax
87 real(kind=kreal), intent(inout), optional :: commtime
88
89 real(kind=kreal) :: start_time, end_time, tcomm
90 integer(kind=kint) :: i, j, js, je, in
91 real(kind=kreal) :: yv1, yv2, x1, x2
92
93 integer(kind=kint) :: n, np
94 integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:)
95 real(kind=kreal), pointer :: al(:), au(:), d(:)
96
97 ! added for turning >>>
98 integer, parameter :: numofblockperthread = 100
99 logical, save :: isfirst = .true.
100 integer, save :: numofthread = 1
101 integer, save, allocatable :: startpos(:), endpos(:)
102 integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
103 integer(kind=kint) :: threadnum, blocknum, numofblock
104 integer(kind=kint) :: numofelement, elementcount, blockindex
105 real(kind=kreal) :: numofelementperblock
106 ! <<< added for turning
107
108 if (hecmw_jad_is_initialized().ne.0) then
109 tcomm = 0.d0
110 start_time = hecmw_wtime()
111 call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
112 end_time = hecmw_wtime()
113 time_ax = time_ax + end_time - start_time - tcomm
114 if (present(commtime)) commtime = commtime + tcomm
115 else
116
117 n = hecmat%N
118 np = hecmat%NP
119 indexl => hecmat%indexL
120 indexu => hecmat%indexU
121 iteml => hecmat%itemL
122 itemu => hecmat%itemU
123 al => hecmat%AL
124 au => hecmat%AU
125 d => hecmat%D
126
127 ! added for turning >>>
128 if (.not. isfirst) then
129 numofblock = numofthread * numofblockperthread
130 if (endpos(numofblock-1) .ne. n-1) then
131 deallocate(startpos, endpos)
132 isfirst = .true.
133 endif
134 endif
135 if (isfirst) then
136 !$ numOfThread = omp_get_max_threads()
137 numofblock = numofthread * numofblockperthread
138 allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
139 numofelement = n + indexl(n) + indexu(n)
140 numofelementperblock = dble(numofelement) / numofblock
141 blocknum = 0
142 elementcount = 0
143 startpos(blocknum) = 1
144 do i= 1, n
145 elementcount = elementcount + 1
146 elementcount = elementcount + (indexl(i) - indexl(i-1))
147 elementcount = elementcount + (indexu(i) - indexu(i-1))
148 if (elementcount > (blocknum + 1) * numofelementperblock) then
149 endpos(blocknum) = i
150 blocknum = blocknum + 1
151 startpos(blocknum) = i + 1
152 if (blocknum == (numofblock - 1)) exit
153 endif
154 enddo
155 endpos(blocknum) = n
156 ! for irregular data
157 do i= blocknum+1, numofblock-1
158 startpos(i) = n
159 endpos(i) = n-1
160 end do
161
163 sectorcachesize0, sectorcachesize1)
164
165 isfirst = .false.
166 endif
167 ! <<< added for turning
168
169 start_time= hecmw_wtime()
170 call hecmw_update_2_r (hecmesh, x, np)
171 end_time= hecmw_wtime()
172 if (present(commtime)) commtime = commtime + end_time - start_time
173
174 start_time = hecmw_wtime()
175
176 !call fapp_start("loopInMatvec33", 1, 0)
177 !call start_collection("loopInMatvec33")
178
179 !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
180 !OCL CACHE_SUBSECTOR_ASSIGN(X)
181
182 !$OMP PARALLEL DEFAULT(NONE) &
183 !$OMP&PRIVATE(i,X1,X2,YV1,YV2,jS,jE,j,in,threadNum,blockNum,blockIndex) &
184 !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread,N,async_matvec_flg)
185 threadnum = 0
186 !$ threadNum = omp_get_thread_num()
187 do blocknum = 0 , numofblockperthread - 1
188 blockindex = blocknum * numofthread + threadnum
189 do i = startpos(blockindex), endpos(blockindex)
190 x1= x(2*i-1)
191 x2= x(2*i )
192 yv1= d(4*i-3)*x1 + d(4*i-2)*x2
193 yv2= d(4*i-1)*x1 + d(4*i )*x2
194
195 js= indexl(i-1) + 1
196 je= indexl(i )
197 do j= js, je
198 in = iteml(j)
199 x1 = x(2*in-1)
200 x2 = x(2*in )
201 yv1= yv1 + al(4*j-3)*x1 + al(4*j-2)*x2
202 yv2= yv2 + al(4*j-1)*x1 + al(4*j )*x2
203 enddo
204 js= indexu(i-1) + 1
205 je= indexu(i )
206 do j= js, je
207 in = itemu(j)
208 x1 = x(2*in-1)
209 x2 = x(2*in )
210 yv1= yv1 + au(4*j-3)*x1 + au(4*j-2)*x2
211 yv2= yv2 + au(4*j-1)*x1 + au(4*j )*x2
212 enddo
213 y(2*i-1)= yv1
214 y(2*i )= yv2
215 enddo
216 enddo
217
218 !$OMP END PARALLEL
219
220 !OCL END_CACHE_SUBSECTOR
221 !OCL END_CACHE_SECTOR_SIZE
222
223
224 end_time = hecmw_wtime()
225 time_ax = time_ax + end_time - start_time
226
227
228 endif
229
230 if (hecmat%cmat%n_val > 0) then
231 call hecmw_cmat_multvec_add( hecmat%cmat, x, y, np * hecmat%NDOF )
232 end if
233
234 end subroutine hecmw_matvec_22_inner
235
236
237
238
239
240
241 !C
242 !C***
243 !C*** hecmw_matresid_22
244 !C***
245 !C
246 subroutine hecmw_matresid_22 (hecMESH, hecMAT, X, B, R, COMMtime)
247 use hecmw_util
248
249 implicit none
250 real(kind=kreal) :: x(:), b(:), r(:)
251 type (hecmwst_matrix) :: hecmat
252 type (hecmwst_local_mesh) :: hecmesh
253 real(kind=kreal), optional :: commtime
254
255 integer(kind=kint) :: i
256 real(kind=kreal) :: tcomm
257
258 tcomm = 0.d0
259 call hecmw_matvec_22 (hecmesh, hecmat, x, r, tcomm)
260 if (present(commtime)) commtime = commtime + tcomm
261 !$omp parallel default(none),private(i),shared(hecMAT,R,B)
262 !$omp do
263 do i = 1, hecmat%N * 2
264 r(i) = b(i) - r(i)
265 enddo
266 !$omp end do
267 !$omp end parallel
268 end subroutine hecmw_matresid_22
269
270 !C
271 !C***
272 !C*** hecmw_rel_resid_L2_22
273 !C***
274 !C
275 function hecmw_rel_resid_l2_22 (hecMESH, hecMAT, COMMtime)
276 use hecmw_util
278
279 implicit none
280 real(kind=kreal) :: hecmw_rel_resid_l2_22
281 type ( hecmwst_local_mesh ), intent(in) :: hecmesh
282 type ( hecmwst_matrix ), intent(in) :: hecmat
283 real(kind=kreal), optional :: commtime
284
285 real(kind=kreal) :: r(hecmat%NDOF*hecmat%NP)
286 real(kind=kreal) :: bnorm2, rnorm2
287 real(kind=kreal) :: tcomm
288
289 tcomm = 0.d0
290 call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, hecmat%B, hecmat%B, bnorm2, tcomm)
291 if (bnorm2 == 0.d0) then
292 bnorm2 = 1.d0
293 endif
294 call hecmw_matresid_22(hecmesh, hecmat, hecmat%X, hecmat%B, r, tcomm)
295 call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
296 if (present(commtime)) commtime = commtime + tcomm
297
298 hecmw_rel_resid_l2_22 = sqrt(rnorm2 / bnorm2)
299
300 end function hecmw_rel_resid_l2_22
301 !C
302 !C***
303 !C*** hecmw_Tvec_22
304 !C***
305 !C
306 subroutine hecmw_tvec_22 (hecMESH, X, Y, COMMtime)
307 use hecmw_util
309 implicit none
310 type (hecmwst_local_mesh), intent(in) :: hecmesh
311 real(kind=kreal), intent(in) :: x(:)
312 real(kind=kreal), intent(out) :: y(:)
313 real(kind=kreal), intent(inout) :: commtime
314
315 real(kind=kreal) :: start_time, end_time
316 integer(kind=kint) :: i, j, jj, k, kk
317
318 start_time= hecmw_wtime()
319 call hecmw_update_3_r (hecmesh, x, hecmesh%n_node)
320 end_time= hecmw_wtime()
321 commtime = commtime + end_time - start_time
322
323 !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
324 !$omp do
325 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
326 y(i)= x(i)
327 enddo
328 !$omp end do
329
330 !$omp do
331 outer: do i= 1, hecmesh%mpc%n_mpc
332 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
333 if (hecmesh%mpc%mpc_dof(j) > 2) cycle outer
334 enddo
335 k = hecmesh%mpc%mpc_index(i-1) + 1
336 kk = 2 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
337 y(kk) = 0.d0
338 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
339 jj = 2 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
340 y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
341 enddo
342 enddo outer
343 !$omp end do
344 !$omp end parallel
345
346 end subroutine hecmw_tvec_22
347
348 !C
349 !C***
350 !C*** hecmw_Ttvec_22
351 !C***
352 !C
353 subroutine hecmw_ttvec_22 (hecMESH, X, Y, COMMtime)
354 use hecmw_util
356 implicit none
357 type (hecmwst_local_mesh), intent(in) :: hecmesh
358 real(kind=kreal), intent(in) :: x(:)
359 real(kind=kreal), intent(out) :: y(:)
360 real(kind=kreal), intent(inout) :: commtime
361
362 real(kind=kreal) :: start_time, end_time
363 integer(kind=kint) :: i, j, jj, k, kk
364
365 start_time= hecmw_wtime()
366 call hecmw_update_2_r (hecmesh, x, hecmesh%n_node)
367 end_time= hecmw_wtime()
368 commtime = commtime + end_time - start_time
369
370 !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
371 !$omp do
372 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
373 y(i)= x(i)
374 enddo
375 !$omp end do
376
377 !$omp do
378 outer: do i= 1, hecmesh%mpc%n_mpc
379 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
380 if (hecmesh%mpc%mpc_dof(j) > 2) cycle outer
381 enddo
382 k = hecmesh%mpc%mpc_index(i-1) + 1
383 kk = 2 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
384 y(kk) = 0.d0
385 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
386 jj = 2 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
387 y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
388 enddo
389 enddo outer
390 !$omp end do
391 !$omp end parallel
392
393 end subroutine hecmw_ttvec_22
394
395 !C
396 !C***
397 !C*** hecmw_TtmatTvec_22
398 !C***
399 !C
400 subroutine hecmw_ttmattvec_22 (hecMESH, hecMAT, X, Y, W, COMMtime)
401 use hecmw_util
402 implicit none
403 type (hecmwst_local_mesh), intent(in) :: hecmesh
404 type (hecmwst_matrix), intent(in) :: hecmat
405 real(kind=kreal), intent(in) :: x(:)
406 real(kind=kreal), intent(out) :: y(:), w(:)
407 real(kind=kreal), intent(inout) :: commtime
408
409 call hecmw_tvec_22(hecmesh, x, y, commtime)
410 call hecmw_matvec_22_inner(hecmesh, hecmat, y, w, commtime)
411 call hecmw_ttvec_22(hecmesh, w, y, commtime)
412
413 end subroutine hecmw_ttmattvec_22
414
415
416 !C
417 !C***
418 !C*** hecmw_mat_diag_sr_22
419 !C***
420 !C
421 subroutine hecmw_mat_diag_sr_22(hecMESH, hecMAT, COMMtime)
422 use hecmw_util
424 implicit none
425 type (hecmwst_local_mesh), intent(in) :: hecmesh
426 type (hecmwst_matrix), intent(inout), target :: hecmat
427 real(kind=kreal), intent(inout), optional :: commtime
428 real(kind=kreal), allocatable :: w(:,:)
429 real(kind=kreal), pointer :: d(:)
430 integer(kind=kint) :: ip
431 real(kind=kreal) :: start_time, end_time
432 allocate(w(2*hecmat%NP,2))
433 d => hecmat%D
434 do ip= 1, hecmat%N
435 w(2*ip-1,1)= d(4*ip-3); w(2*ip-1,2)= d(4*ip-2);
436 w(2*ip-0,1)= d(4*ip-1); w(2*ip-0,2)= d(4*ip-0);
437 enddo
438 start_time= hecmw_wtime()
439 call hecmw_update_2_r (hecmesh, w(:,1), hecmat%NP)
440 call hecmw_update_2_r (hecmesh, w(:,2), hecmat%NP)
441 end_time= hecmw_wtime()
442 if (present(commtime)) commtime = commtime + end_time - start_time
443 do ip= hecmat%N+1, hecmat%NP
444 d(4*ip-3)= w(2*ip-1,1); d(4*ip-2)= w(2*ip-1,2);
445 d(4*ip-1)= w(2*ip-0,1); d(4*ip-0)= w(2*ip-0,2);
446 enddo
447 deallocate(w)
448 end subroutine hecmw_mat_diag_sr_22
449end module hecmw_solver_las_22
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
integer(kind=kint) function, public hecmw_jad_is_initialized()
Definition: hecmw_jadm.f90:56
subroutine, public hecmw_jad_matvec(hecmesh, hecmat, x, y, commtime)
Definition: hecmw_jadm.f90:61
subroutine, public hecmw_cmat_multvec_add(cmat, x, y, len)
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecmat)
real(kind=kreal) function, public hecmw_rel_resid_l2_22(hecmesh, hecmat, commtime)
subroutine, public hecmw_matvec_22(hecmesh, hecmat, x, y, time_ax, commtime)
subroutine, public hecmw_ttvec_22(hecmesh, x, y, commtime)
subroutine, public hecmw_ttmattvec_22(hecmesh, hecmat, x, y, w, commtime)
subroutine, public hecmw_matresid_22(hecmesh, hecmat, x, b, r, commtime)
subroutine, public hecmw_matvec_22_set_async(hecmat)
subroutine, public hecmw_mat_diag_sr_22(hecmesh, hecmat, commtime)
subroutine, public hecmw_matvec_22_unset_async
subroutine, public hecmw_tvec_22(hecmesh, x, y, commtime)
subroutine hecmw_innerproduct_r(hecmesh, ndof, x, y, sum, commtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_2_r(hecmesh, val, n)
subroutine hecmw_update_3_r(hecmesh, val, n)