FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_precond_SSOR_nn.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
6!C
7!C***
8!C*** module hecmw_precond_SSOR_nn
9!C***
10!C
12 use hecmw_util
18 !$ use omp_lib
19
20 private
21
25
26 integer(kind=kint) :: N
27 real(kind=kreal), pointer :: d(:) => null()
28 real(kind=kreal), pointer :: al(:) => null()
29 real(kind=kreal), pointer :: au(:) => null()
30 integer(kind=kint), pointer :: indexL(:) => null()
31 integer(kind=kint), pointer :: indexU(:) => null()
32 integer(kind=kint), pointer :: itemL(:) => null()
33 integer(kind=kint), pointer :: itemU(:) => null()
34 real(kind=kreal), pointer :: alu(:) => null()
35
36 integer(kind=kint) :: NContact = 0
37 real(kind=kreal), pointer :: cal(:) => null()
38 real(kind=kreal), pointer :: cau(:) => null()
39 integer(kind=kint), pointer :: indexCL(:) => null()
40 integer(kind=kint), pointer :: indexCU(:) => null()
41 integer(kind=kint), pointer :: itemCL(:) => null()
42 integer(kind=kint), pointer :: itemCU(:) => null()
43
44 integer(kind=kint) :: NColor
45 integer(kind=kint), pointer :: COLORindex(:) => null()
46 integer(kind=kint), pointer :: perm(:) => null()
47 integer(kind=kint), pointer :: iperm(:) => null()
48
49 logical, save :: isFirst = .true.
50
51 logical, save :: INITIALIZED = .false.
52
53contains
54
55 subroutine hecmw_precond_ssor_nn_setup(hecMAT)
56 implicit none
57 type(hecmwst_matrix), intent(inout) :: hecmat
58 integer(kind=kint ) :: npl, npu, npcl, npcu
59 real (kind=kreal), allocatable :: cd(:)
60 integer(kind=kint ) :: ncolor_in
61 real (kind=kreal) :: sigma_diag
62 real (kind=kreal) :: alutmp(hecmat%NDOF,hecmat%NDOF), pw(hecmat%NDOF)
63 integer(kind=kint ) :: ii, i, j, k, ndof, ndof2
64 integer(kind=kint ) :: nthreads = 1
65 integer(kind=kint ), allocatable :: perm_tmp(:)
66 !real (kind=kreal) :: t0
67
68 !t0 = hecmw_Wtime()
69 !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
70
71 if (initialized) then
72 if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
74 else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
75 call hecmw_precond_ssor_nn_clear(hecmat) ! TEMPORARY
76 else
77 return
78 endif
79 endif
80
81 !$ nthreads = omp_get_max_threads()
82
83 n = hecmat%N
84 ndof=hecmat%NDOF
85 ndof2=ndof*ndof
86
87 ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
88 sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
89 ncontact = hecmat%cmat%n_val
90
91 if (ncontact.gt.0) then
92 call hecmw_cmat_lu( hecmat )
93 endif
94
95 if (nthreads == 1) then
96 ncolor = 1
97 allocate(colorindex(0:1), perm(n), iperm(n))
98 colorindex(0) = 0
99 colorindex(1) = n
100 do i=1,n
101 perm(i) = i
102 iperm(i) = i
103 end do
104 else
105 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
106 call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
107 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
108 !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
109 call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
110 hecmat%indexU, hecmat%itemU, perm_tmp, &
111 ncolor_in, ncolor, colorindex, perm, iperm)
112 !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
113 deallocate(perm_tmp)
114
115 !call write_debug_info
116 endif
117
118 npl = hecmat%indexL(n)
119 npu = hecmat%indexU(n)
120 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
121 call hecmw_matrix_reorder_profile(n, perm, iperm, &
122 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
123 indexl, indexu, iteml, itemu)
124 !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
125
126 !call check_ordering
127
128 allocate(d(ndof2*n), al(ndof2*npl), au(ndof2*npu))
129 call hecmw_matrix_reorder_values(n, ndof, perm, iperm, &
130 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
131 hecmat%AL, hecmat%AU, hecmat%D, &
132 indexl, indexu, iteml, itemu, al, au, d)
133 !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
134
135 call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
136 call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
137
138 if (ncontact.gt.0) then
139 npcl = hecmat%indexCL(n)
140 npcu = hecmat%indexCU(n)
141 allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
142 call hecmw_matrix_reorder_profile(n, perm, iperm, &
143 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
144 indexcl, indexcu, itemcl, itemcu)
145
146 allocate(cd(ndof2*n), cal(ndof2*npcl), cau(ndof2*npcu))
147 call hecmw_matrix_reorder_values(n, ndof, perm, iperm, &
148 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
149 hecmat%CAL, hecmat%CAU, hecmat%D, &
150 indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
151 deallocate(cd)
152
153 call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
154 call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
155 end if
156
157 allocate(alu(ndof2*n))
158 alu = 0.d0
159
160 do ii= 1, ndof2*n
161 alu(ii) = d(ii)
162 enddo
163
164 if (ncontact.gt.0) then
165 do k= 1, hecmat%cmat%n_val
166 if (hecmat%cmat%pair(k)%i.ne.hecmat%cmat%pair(k)%j) cycle
167 ii = iperm( hecmat%cmat%pair(k)%i )
168 do i = 1, ndof
169 do j = 1, ndof
170 alu(ndof2*(ii-1)+ndof*(i-1)+j) = alu(ndof2*(ii-1)+ndof*(i-1)+j) + hecmat%cmat%pair(k)%val(i, j)
171 end do
172 end do
173 enddo
174 endif
175
176 !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,NDOF,NDOF2,ALU,SIGMA_DIAG)
177 !$omp do
178 do ii= 1, n
179 do i = 1, ndof
180 do j = 1, ndof
181 alutmp(i,j) = alu(ndof2*(ii-1)+(i-1)*ndof+j)
182 if (i==j) alutmp(i,j)=alutmp(i,j)*sigma_diag
183 end do
184 end do
185 do k= 1, ndof
186 alutmp(k,k)= 1.d0/alutmp(k,k)
187 do i= k+1, ndof
188 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
189 do j= k+1, ndof
190 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
191 enddo
192 do j= k+1, ndof
193 alutmp(i,j)= pw(j)
194 enddo
195 enddo
196 enddo
197 do i = 1, ndof
198 do j = 1, ndof
199 alu(ndof2*(ii-1)+(i-1)*ndof+j)= alutmp(i,j)
200 end do
201 end do
202 enddo
203 !$omp end do
204 !$omp end parallel
205
206 isfirst = .true.
207
208 initialized = .true.
209 hecmat%Iarray(98) = 0 ! symbolic setup done
210 hecmat%Iarray(97) = 0 ! numerical setup done
211
212 !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
213
214 end subroutine hecmw_precond_ssor_nn_setup
215
216 subroutine hecmw_precond_ssor_nn_apply(ZP, NDOF)
218 implicit none
219 real(kind=kreal), intent(inout) :: zp(:)
220 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k, ndof, ndof2, idof,jdof
221 real(kind=kreal) :: sw(ndof), x(ndof)
222
223 ! added for turning >>>
224 integer(kind=kint), parameter :: numofblockperthread = 100
225 integer(kind=kint), save :: numofthread = 1, numofblock
226 integer(kind=kint), save, allocatable :: ictoblockindex(:)
227 integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
228 integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
229 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
230 real(kind=kreal) :: numofelementperblock
231 integer(kind=kint) :: my_rank
232
233 ndof2=ndof*ndof
234 if (isfirst) then
235 !$ numOfThread = omp_get_max_threads()
236 numofblock = numofthread * numofblockperthread
237 if (allocated(ictoblockindex)) deallocate(ictoblockindex)
238 if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
239 allocate (ictoblockindex(0:ncolor), &
240 blockindextocolorindex(0:numofblock + ncolor))
241 numofelement = n + indexl(n) + indexu(n)
242 numofelementperblock = dble(numofelement) / numofblock
243 blockindex = 0
244 ictoblockindex = -1
245 ictoblockindex(0) = 0
246 blockindextocolorindex = -1
247 blockindextocolorindex(0) = 0
248 my_rank = hecmw_comm_get_rank()
249 ! write(9000+my_rank,*) &
250 ! '# numOfElementPerBlock =', numOfElementPerBlock
251 ! write(9000+my_rank,*) &
252 ! '# ic, blockIndex, colorIndex, elementCount'
253 do ic = 1, ncolor
254 elementcount = 0
255 ii = 1
256 do i = colorindex(ic-1)+1, colorindex(ic)
257 elementcount = elementcount + 1
258 elementcount = elementcount + (indexl(i) - indexl(i-1))
259 elementcount = elementcount + (indexu(i) - indexu(i-1))
260 if (elementcount > ii * numofelementperblock &
261 .or. i == colorindex(ic)) then
262 ii = ii + 1
263 blockindex = blockindex + 1
264 blockindextocolorindex(blockindex) = i
265 ! write(9000+my_rank,*) ic, blockIndex, &
266 ! blockIndexToColorIndex(blockIndex), elementCount
267 endif
268 enddo
269 ictoblockindex(ic) = blockindex
270 enddo
271 numofblock = blockindex
272
273 call hecmw_tuning_fx_calc_sector_cache( n, ndof, &
274 sectorcachesize0, sectorcachesize1 )
275
276 isfirst = .false.
277 endif
278 ! <<< added for turning
279
280 !call start_collection("loopInPrecond33")
281
282 !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
283 !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
284
285 !$omp parallel default(none) &
286 !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
287 !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
288 !$omp& ZP,icToBlockIndex,blockIndexToColorIndex,NDOF,NDOF2) &
289 !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex,idof,jdof)
290
291 !C-- FORWARD
292 do ic=1,ncolor
293 !$omp do schedule (static, 1)
294 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
295 do i = blockindextocolorindex(blockindex-1)+1, &
296 blockindextocolorindex(blockindex)
297 iold = perm(i)
298 do idof = 1, ndof
299 sw(idof) = zp(ndof*(iold-1)+idof)
300 end do
301 isl= indexl(i-1)+1
302 iel= indexl(i)
303 do j= isl, iel
304 k= iteml(j)
305 do idof = 1, ndof
306 x(idof) = zp(ndof*(k-1)+idof)
307 end do
308 do idof = 1, ndof
309 do jdof = 1, ndof
310 sw(idof) = sw(idof) - al(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
311 end do
312 end do
313 enddo ! j
314
315 if (ncontact.ne.0) then
316 isl= indexcl(i-1)+1
317 iel= indexcl(i)
318 do j= isl, iel
319 k= itemcl(j)
320 do idof = 1, ndof
321 x(idof) = zp(ndof*(k-1)+idof)
322 end do
323 do idof = 1, ndof
324 do jdof = 1, ndof
325 sw(idof) = sw(idof) - cal(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
326 end do
327 end do
328 enddo ! j
329 endif
330
331 x = sw
332 do idof = 2,ndof
333 do jdof = 1, idof-1
334 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof )*x(jdof)
335 end do
336 end do
337 do idof = ndof, 1, -1
338 do jdof = ndof, idof+1, -1
339 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof)*x(jdof)
340 end do
341 x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
342 end do
343 zp(ndof*(iold-1)+1:ndof*(iold-1)+ndof) = x(1:ndof)
344
345 enddo ! i
346 enddo ! blockIndex
347 !$omp end do
348 enddo ! ic
349
350 !C-- BACKWARD
351 do ic=ncolor, 1, -1
352 !$omp do schedule (static, 1)
353 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
354 do i = blockindextocolorindex(blockindex), &
355 blockindextocolorindex(blockindex-1)+1, -1
356 ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
357 ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
358 ! blockIndexToColorIndex(blockIndex)
359 ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
360 sw= 0.d0
361 isu= indexu(i-1) + 1
362 ieu= indexu(i)
363 do j= ieu, isu, -1
364 k= itemu(j)
365 do idof = 1, ndof
366 x(idof) = zp(ndof*(k-1)+idof)
367 end do
368 do idof = 1, ndof
369 do jdof = 1, ndof
370 sw(idof) = sw(idof) + au(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
371 end do
372 end do
373 enddo ! j
374
375 if (ncontact.gt.0) then
376 isu= indexcu(i-1) + 1
377 ieu= indexcu(i)
378 do j= ieu, isu, -1
379 k= itemcu(j)
380 do idof = 1, ndof
381 x(idof) = zp(ndof*(k-1)+idof)
382 end do
383 do idof = 1, ndof
384 do jdof = 1, ndof
385 sw(idof) = sw(idof) + cau(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
386 end do
387 end do
388 enddo ! j
389 endif
390
391 x = sw
392 do idof = 2, ndof
393 do k = 1,idof-1
394 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
395 end do
396 end do
397 do idof = ndof, 1, -1
398 do k = ndof, idof+1, -1
399 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
400 end do
401 x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
402 end do
403 iold = perm(i)
404 do idof = 1, ndof
405 zp(ndof*(iold-1)+idof) = zp(ndof*(iold-1)+idof) - x(idof)
406 end do
407 enddo ! i
408 enddo ! blockIndex
409 !$omp end do
410 enddo ! ic
411 !$omp end parallel
412
413 !OCL END_CACHE_SUBSECTOR
414 !OCL END_CACHE_SECTOR_SIZE
415
416 !call stop_collection("loopInPrecond33")
417
418 end subroutine hecmw_precond_ssor_nn_apply
419
421 implicit none
422 type(hecmwst_matrix), intent(inout) :: hecmat
423 integer(kind=kint ) :: nthreads = 1
424 !$ nthreads = omp_get_max_threads()
425 if (associated(colorindex)) deallocate(colorindex)
426 if (associated(perm)) deallocate(perm)
427 if (associated(iperm)) deallocate(iperm)
428 if (associated(alu)) deallocate(alu)
429 if (nthreads >= 1) then
430 if (associated(d)) deallocate(d)
431 if (associated(al)) deallocate(al)
432 if (associated(au)) deallocate(au)
433 if (associated(indexl)) deallocate(indexl)
434 if (associated(indexu)) deallocate(indexu)
435 if (associated(iteml)) deallocate(iteml)
436 if (associated(itemu)) deallocate(itemu)
437 if (ncontact.ne.0) then
438 if (associated(cal)) deallocate(cal)
439 if (associated(cau)) deallocate(cau)
440 if (associated(indexcl)) deallocate(indexcl)
441 if (associated(indexcu)) deallocate(indexcu)
442 if (associated(itemcl)) deallocate(itemcl)
443 if (associated(itemcu)) deallocate(itemcu)
444 end if
445 end if
446 nullify(colorindex)
447 nullify(perm)
448 nullify(iperm)
449 nullify(alu)
450 nullify(d)
451 nullify(al)
452 nullify(au)
453 nullify(indexl)
454 nullify(indexu)
455 nullify(iteml)
456 nullify(itemu)
457 if (ncontact.ne.0) then
458 nullify(cal)
459 nullify(cau)
460 nullify(indexcl)
461 nullify(indexcu)
462 nullify(itemcl)
463 nullify(itemcu)
464 call hecmw_cmat_lu_free( hecmat )
465 endif
466 ncontact = 0
467 initialized = .false.
468 end subroutine hecmw_precond_ssor_nn_clear
469
470 subroutine write_debug_info
471 implicit none
472 integer(kind=kint) :: my_rank, ic, in
473 my_rank = hecmw_comm_get_rank()
474 !--------------------> debug: shizawa
475 if (my_rank.eq.0) then
476 write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
477 endif
478 write(19000+my_rank,'(a)') '#NCOLORTot'
479 write(19000+my_rank,*) ncolor
480 write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
481 do ic=1,ncolor
482 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
483 enddo ! ic
484 write(29000+my_rank,'(a)') '#n_node'
485 write(29000+my_rank,*) n
486 write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
487 do in=1,n
488 write(29000+my_rank,*) in, iperm(in), perm(in)
489 if (perm(iperm(in)) .ne. in) then
490 write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
491 endif
492 enddo
493 end subroutine write_debug_info
494
495 subroutine check_ordering
496 implicit none
497 integer(kind=kint) :: ic, i, j, k
498 integer(kind=kint), allocatable :: iicolor(:)
499 ! check color dependence of neighbouring nodes
500 if (ncolor.gt.1) then
501 allocate(iicolor(n))
502 do ic=1,ncolor
503 do i= colorindex(ic-1)+1, colorindex(ic)
504 iicolor(i) = ic
505 enddo ! i
506 enddo ! ic
507 ! FORWARD: L-part
508 do ic=1,ncolor
509 do i= colorindex(ic-1)+1, colorindex(ic)
510 do j= indexl(i-1)+1, indexl(i)
511 k= iteml(j)
512 if (iicolor(i).eq.iicolor(k)) then
513 write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
514 endif
515 enddo ! j
516 enddo ! i
517 enddo ! ic
518 ! BACKWARD: U-part
519 do ic=ncolor, 1, -1
520 do i= colorindex(ic), colorindex(ic-1)+1, -1
521 do j= indexu(i-1)+1, indexu(i)
522 k= itemu(j)
523 if (iicolor(i).eq.iicolor(k)) then
524 write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
525 endif
526 enddo ! j
527 enddo ! i
528 enddo ! ic
529 deallocate(iicolor)
530 endif ! if (NColor.gt.1)
531 !--------------------< debug: shizawa
532 end subroutine check_ordering
533
534end module hecmw_precond_ssor_nn
subroutine, public hecmw_cmat_lu(hecmat)
subroutine, public hecmw_cmat_lu_free(hecmat)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecmat)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecmat)
subroutine, public hecmw_matrix_reorder_renum_item(n, perm, indexxp, itemxp)
subroutine, public hecmw_matrix_reorder_values(n, ndof, perm, iperm, indexl, indexu, iteml, itemu, al, au, d, indexlp, indexup, itemlp, itemup, alp, aup, dp)
subroutine, public hecmw_matrix_reorder_profile(n, perm, iperm, indexl, indexu, iteml, itemu, indexlp, indexup, itemlp, itemup)
subroutine, public hecmw_precond_ssor_nn_setup(hecmat)
subroutine, public hecmw_precond_ssor_nn_clear(hecmat)
subroutine, public hecmw_precond_ssor_nn_apply(zp, ndof)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(n, indexl, iteml, indexu, itemu, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(n, indexl, iteml, indexu, itemu, perm_cur, ncolor_in, ncolor_out, colorindex, perm, iperm)