FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_precond_SSOR_22.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
6!C
7!C***
8!C*** module hecmw_precond_SSOR_22
9!C***
10!C
12 use hecmw_util
18 !$ use omp_lib
19
20 private
21
25
26 integer(kind=kint) :: N
27 real(kind=kreal), pointer :: d(:) => null()
28 real(kind=kreal), pointer :: al(:) => null()
29 real(kind=kreal), pointer :: au(:) => null()
30 integer(kind=kint), pointer :: indexL(:) => null()
31 integer(kind=kint), pointer :: indexU(:) => null()
32 integer(kind=kint), pointer :: itemL(:) => null()
33 integer(kind=kint), pointer :: itemU(:) => null()
34 real(kind=kreal), pointer :: alu(:) => null()
35
36 integer(kind=kint) :: NContact = 0
37 real(kind=kreal), pointer :: cal(:) => null()
38 real(kind=kreal), pointer :: cau(:) => null()
39 integer(kind=kint), pointer :: indexCL(:) => null()
40 integer(kind=kint), pointer :: indexCU(:) => null()
41 integer(kind=kint), pointer :: itemCL(:) => null()
42 integer(kind=kint), pointer :: itemCU(:) => null()
43
44 integer(kind=kint) :: NColor
45 integer(kind=kint), pointer :: COLORindex(:) => null()
46 integer(kind=kint), pointer :: perm(:) => null()
47 integer(kind=kint), pointer :: iperm(:) => null()
48
49 logical, save :: isFirst = .true.
50
51 logical, save :: INITIALIZED = .false.
52
53contains
54
55 subroutine hecmw_precond_ssor_22_setup(hecMAT)
56 implicit none
57 type(hecmwst_matrix), intent(inout) :: hecmat
58 integer(kind=kint ) :: npl, npu, npcl, npcu
59 real (kind=kreal), allocatable :: cd(:)
60 integer(kind=kint ) :: ncolor_in
61 real (kind=kreal) :: sigma_diag
62 real (kind=kreal) :: alutmp(2,2), pw(2)
63 integer(kind=kint ) :: ii, i, j, k
64 integer(kind=kint ) :: nthreads = 1
65 integer(kind=kint ), allocatable :: perm_tmp(:)
66 !real (kind=kreal) :: t0
67
68 !t0 = hecmw_Wtime()
69 !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
70
71 if (initialized) then
72 if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
74 else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
75 call hecmw_precond_ssor_22_clear(hecmat) ! TEMPORARY
76 else
77 return
78 endif
79 endif
80
81 !$ nthreads = omp_get_max_threads()
82
83 n = hecmat%N
84 ! N = hecMAT%NP
85 ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
86 sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
87 ncontact = hecmat%cmat%n_val
88
89 if (ncontact.gt.0) then
90 call hecmw_cmat_lu( hecmat )
91 endif
92
93 if (nthreads == 1) then
94 ncolor = 1
95 allocate(colorindex(0:1), perm(n), iperm(n))
96 colorindex(0) = 0
97 colorindex(1) = n
98 do i=1,n
99 perm(i) = i
100 iperm(i) = i
101 end do
102 else
103 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
104 call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
105 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
106 !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
107 call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
108 hecmat%indexU, hecmat%itemU, perm_tmp, &
109 ncolor_in, ncolor, colorindex, perm, iperm)
110 !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
111 deallocate(perm_tmp)
112
113 !call write_debug_info
114 endif
115
116 npl = hecmat%indexL(n)
117 npu = hecmat%indexU(n)
118 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
119 call hecmw_matrix_reorder_profile(n, perm, iperm, &
120 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
121 indexl, indexu, iteml, itemu)
122 !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
123
124 !call check_ordering
125
126 allocate(d(4*n), al(4*npl), au(4*npu))
127 call hecmw_matrix_reorder_values(n, 2, perm, iperm, &
128 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
129 hecmat%AL, hecmat%AU, hecmat%D, &
130 indexl, indexu, iteml, itemu, al, au, d)
131 !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
132
133 call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
134 call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
135
136 if (ncontact.gt.0) then
137 npcl = hecmat%indexCL(n)
138 npcu = hecmat%indexCU(n)
139 allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
140 call hecmw_matrix_reorder_profile(n, perm, iperm, &
141 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
142 indexcl, indexcu, itemcl, itemcu)
143
144 allocate(cd(4*n), cal(4*npcl), cau(4*npcu))
145 call hecmw_matrix_reorder_values(n, 2, perm, iperm, &
146 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
147 hecmat%CAL, hecmat%CAU, hecmat%D, &
148 indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
149 deallocate(cd)
150
151 call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
152 call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
153 end if
154
155 allocate(alu(4*n))
156 alu = 0.d0
157
158 do ii= 1, 4*n
159 alu(ii) = d(ii)
160 enddo
161
162 if (ncontact.gt.0) then
163 do k= 1, hecmat%cmat%n_val
164 if (hecmat%cmat%pair(k)%i.ne.hecmat%cmat%pair(k)%j) cycle
165 ii = iperm( hecmat%cmat%pair(k)%i )
166 alu(4*ii-3) = alu(4*ii-3) + hecmat%cmat%pair(k)%val(1, 1)
167 alu(4*ii-2) = alu(4*ii-2) + hecmat%cmat%pair(k)%val(1, 2)
168 alu(4*ii-1) = alu(4*ii-1) + hecmat%cmat%pair(k)%val(2, 1)
169 alu(4*ii-0) = alu(4*ii-0) + hecmat%cmat%pair(k)%val(2, 2)
170 enddo
171 endif
172
173 !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,ALU,SIGMA_DIAG)
174 !$omp do
175 do ii= 1, n
176 alutmp(1,1)= alu(4*ii-3) * sigma_diag
177 alutmp(1,2)= alu(4*ii-2)
178 alutmp(2,1)= alu(4*ii-1)
179 alutmp(2,2)= alu(4*ii-0) * sigma_diag
180 do k= 1, 2
181 alutmp(k,k)= 1.d0/alutmp(k,k)
182 do i= k+1, 2
183 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
184 do j= k+1, 2
185 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
186 enddo
187 do j= k+1, 2
188 alutmp(i,j)= pw(j)
189 enddo
190 enddo
191 enddo
192 alu(4*ii-3)= alutmp(1,1)
193 alu(4*ii-2)= alutmp(1,2)
194 alu(4*ii-1)= alutmp(2,1)
195 alu(4*ii-0)= alutmp(2,2)
196 enddo
197 !$omp end do
198 !$omp end parallel
199
200 isfirst = .true.
201
202 initialized = .true.
203 hecmat%Iarray(98) = 0 ! symbolic setup done
204 hecmat%Iarray(97) = 0 ! numerical setup done
205
206 !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
207
208 end subroutine hecmw_precond_ssor_22_setup
209
212 implicit none
213 real(kind=kreal), intent(inout) :: zp(:)
214 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
215 real(kind=kreal) :: sw(2), x(2)
216
217 ! added for turning >>>
218 integer(kind=kint), parameter :: numofblockperthread = 100
219 integer(kind=kint), save :: numofthread = 1, numofblock
220 integer(kind=kint), save, allocatable :: ictoblockindex(:)
221 integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
222 integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
223 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
224 real(kind=kreal) :: numofelementperblock
225 integer(kind=kint) :: my_rank
226
227 if (isfirst) then
228 !$ numOfThread = omp_get_max_threads()
229 numofblock = numofthread * numofblockperthread
230 if (allocated(ictoblockindex)) deallocate(ictoblockindex)
231 if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
232 allocate (ictoblockindex(0:ncolor), &
233 blockindextocolorindex(0:numofblock + ncolor))
234 numofelement = n + indexl(n) + indexu(n)
235 numofelementperblock = dble(numofelement) / numofblock
236 blockindex = 0
237 ictoblockindex = -1
238 ictoblockindex(0) = 0
239 blockindextocolorindex = -1
240 blockindextocolorindex(0) = 0
241 my_rank = hecmw_comm_get_rank()
242 ! write(9000+my_rank,*) &
243 ! '# numOfElementPerBlock =', numOfElementPerBlock
244 ! write(9000+my_rank,*) &
245 ! '# ic, blockIndex, colorIndex, elementCount'
246 do ic = 1, ncolor
247 elementcount = 0
248 ii = 1
249 do i = colorindex(ic-1)+1, colorindex(ic)
250 elementcount = elementcount + 1
251 elementcount = elementcount + (indexl(i) - indexl(i-1))
252 elementcount = elementcount + (indexu(i) - indexu(i-1))
253 if (elementcount > ii * numofelementperblock &
254 .or. i == colorindex(ic)) then
255 ii = ii + 1
256 blockindex = blockindex + 1
257 blockindextocolorindex(blockindex) = i
258 ! write(9000+my_rank,*) ic, blockIndex, &
259 ! blockIndexToColorIndex(blockIndex), elementCount
260 endif
261 enddo
262 ictoblockindex(ic) = blockindex
263 enddo
264 numofblock = blockindex
265
267 sectorcachesize0, sectorcachesize1 )
268
269 isfirst = .false.
270 endif
271 ! <<< added for turning
272
273 !call start_collection("loopInPrecond33")
274
275 !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
276 !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
277
278 !$omp parallel default(none) &
279 !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
280 !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
281 !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
282 !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
283
284 !C-- FORWARD
285 do ic=1,ncolor
286 !$omp do schedule (static, 1)
287 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
288 do i = blockindextocolorindex(blockindex-1)+1, &
289 blockindextocolorindex(blockindex)
290 iold = perm(i)
291 sw(1)= zp(2*iold-1)
292 sw(2)= zp(2*iold-0)
293 isl= indexl(i-1)+1
294 iel= indexl(i)
295 do j= isl, iel
296 k= iteml(j)
297 x(1)= zp(2*k-1)
298 x(2)= zp(2*k-0)
299 sw(1)= sw(1) - al(4*j-3)*x(1) - al(4*j-2)*x(2)
300 sw(2)= sw(2) - al(4*j-1)*x(1) - al(4*j-0)*x(2)
301 enddo ! j
302
303 if (ncontact.ne.0) then
304 isl= indexcl(i-1)+1
305 iel= indexcl(i)
306 do j= isl, iel
307 k= itemcl(j)
308 x(1)= zp(2*k-1)
309 x(2)= zp(2*k-0)
310 sw(1)= sw(1) - cal(4*j-3)*x(1) - cal(4*j-2)*x(2)
311 sw(2)= sw(2) - cal(4*j-1)*x(1) - cal(4*j-0)*x(2)
312 enddo ! j
313 endif
314
315 x = sw
316 x(2)= x(2) - alu(4*i-1)*x(1)
317 x(2)= alu(4*i )* x(2)
318 x(1)= alu(4*i-3)*( x(1) - alu(4*i-2)*x(2))
319 zp(2*iold-1)= x(1)
320 zp(2*iold-0)= x(2)
321 enddo ! i
322 enddo ! blockIndex
323 !$omp end do
324 enddo ! ic
325
326 !C-- BACKWARD
327 do ic=ncolor, 1, -1
328 !$omp do schedule (static, 1)
329 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
330 do i = blockindextocolorindex(blockindex), &
331 blockindextocolorindex(blockindex-1)+1, -1
332 ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
333 ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
334 ! blockIndexToColorIndex(blockIndex)
335 ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
336 sw= 0.d0
337 isu= indexu(i-1) + 1
338 ieu= indexu(i)
339 do j= ieu, isu, -1
340 k= itemu(j)
341 x(1)= zp(2*k-1)
342 x(2)= zp(2*k-0)
343 sw(1)= sw(1) + au(4*j-3)*x(1) + au(4*j-2)*x(2)
344 sw(2)= sw(2) + au(4*j-1)*x(1) + au(4*j-0)*x(2)
345 enddo ! j
346
347 if (ncontact.gt.0) then
348 isu= indexcu(i-1) + 1
349 ieu= indexcu(i)
350 do j= ieu, isu, -1
351 k= itemcu(j)
352 x(1)= zp(2*k-1)
353 x(2)= zp(2*k-0)
354 sw(1)= sw(1) + cau(4*j-3)*x(1) + cau(4*j-2)*x(2)
355 sw(2)= sw(2) + cau(4*j-1)*x(1) + cau(4*j-0)*x(2)
356 enddo ! j
357 endif
358
359 x = sw
360 x(2)= x(2) - alu(4*i-1)*x(1)
361
362
363 x(2)= alu(4*i )* x(2)
364 x(1)= alu(4*i-3)*( x(1) - alu(4*i-2)*x(2) )
365
366 iold = perm(i)
367 zp(2*iold-1)= zp(2*iold-1) - x(1)
368 zp(2*iold )= zp(2*iold ) - x(2)
369 enddo ! i
370 enddo ! blockIndex
371 !$omp end do
372 enddo ! ic
373 !$omp end parallel
374
375 !OCL END_CACHE_SUBSECTOR
376 !OCL END_CACHE_SECTOR_SIZE
377
378 !call stop_collection("loopInPrecond33")
379
380 end subroutine hecmw_precond_ssor_22_apply
381
383 implicit none
384 type(hecmwst_matrix), intent(inout) :: hecmat
385 integer(kind=kint ) :: nthreads = 1
386 !$ nthreads = omp_get_max_threads()
387 if (associated(colorindex)) deallocate(colorindex)
388 if (associated(perm)) deallocate(perm)
389 if (associated(iperm)) deallocate(iperm)
390 if (associated(alu)) deallocate(alu)
391 if (nthreads >= 1) then
392 if (associated(d)) deallocate(d)
393 if (associated(al)) deallocate(al)
394 if (associated(au)) deallocate(au)
395 if (associated(indexl)) deallocate(indexl)
396 if (associated(indexu)) deallocate(indexu)
397 if (associated(iteml)) deallocate(iteml)
398 if (associated(itemu)) deallocate(itemu)
399 if (ncontact.ne.0) then
400 if (associated(cal)) deallocate(cal)
401 if (associated(cau)) deallocate(cau)
402 if (associated(indexcl)) deallocate(indexcl)
403 if (associated(indexcu)) deallocate(indexcu)
404 if (associated(itemcl)) deallocate(itemcl)
405 if (associated(itemcu)) deallocate(itemcu)
406 end if
407 end if
408 nullify(colorindex)
409 nullify(perm)
410 nullify(iperm)
411 nullify(alu)
412 nullify(d)
413 nullify(al)
414 nullify(au)
415 nullify(indexl)
416 nullify(indexu)
417 nullify(iteml)
418 nullify(itemu)
419 if (ncontact.ne.0) then
420 nullify(cal)
421 nullify(cau)
422 nullify(indexcl)
423 nullify(indexcu)
424 nullify(itemcl)
425 nullify(itemcu)
426 call hecmw_cmat_lu_free( hecmat )
427 endif
428 ncontact = 0
429 initialized = .false.
430 end subroutine hecmw_precond_ssor_22_clear
431
432 subroutine write_debug_info
433 implicit none
434 integer(kind=kint) :: my_rank, ic, in
435 my_rank = hecmw_comm_get_rank()
436 !--------------------> debug: shizawa
437 if (my_rank.eq.0) then
438 write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
439 endif
440 write(19000+my_rank,'(a)') '#NCOLORTot'
441 write(19000+my_rank,*) ncolor
442 write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
443 do ic=1,ncolor
444 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
445 enddo ! ic
446 write(29000+my_rank,'(a)') '#n_node'
447 write(29000+my_rank,*) n
448 write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
449 do in=1,n
450 write(29000+my_rank,*) in, iperm(in), perm(in)
451 if (perm(iperm(in)) .ne. in) then
452 write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
453 endif
454 enddo
455 end subroutine write_debug_info
456
457 subroutine check_ordering
458 implicit none
459 integer(kind=kint) :: ic, i, j, k
460 integer(kind=kint), allocatable :: iicolor(:)
461 ! check color dependence of neighbouring nodes
462 if (ncolor.gt.1) then
463 allocate(iicolor(n))
464 do ic=1,ncolor
465 do i= colorindex(ic-1)+1, colorindex(ic)
466 iicolor(i) = ic
467 enddo ! i
468 enddo ! ic
469 ! FORWARD: L-part
470 do ic=1,ncolor
471 do i= colorindex(ic-1)+1, colorindex(ic)
472 do j= indexl(i-1)+1, indexl(i)
473 k= iteml(j)
474 if (iicolor(i).eq.iicolor(k)) then
475 write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
476 endif
477 enddo ! j
478 enddo ! i
479 enddo ! ic
480 ! BACKWARD: U-part
481 do ic=ncolor, 1, -1
482 do i= colorindex(ic), colorindex(ic-1)+1, -1
483 do j= indexu(i-1)+1, indexu(i)
484 k= itemu(j)
485 if (iicolor(i).eq.iicolor(k)) then
486 write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
487 endif
488 enddo ! j
489 enddo ! i
490 enddo ! ic
491 deallocate(iicolor)
492 endif ! if (NColor.gt.1)
493 !--------------------< debug: shizawa
494 end subroutine check_ordering
495
496end module hecmw_precond_ssor_22
subroutine, public hecmw_cmat_lu(hecmat)
subroutine, public hecmw_cmat_lu_free(hecmat)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecmat)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecmat)
subroutine, public hecmw_matrix_reorder_renum_item(n, perm, indexxp, itemxp)
subroutine, public hecmw_matrix_reorder_values(n, ndof, perm, iperm, indexl, indexu, iteml, itemu, al, au, d, indexlp, indexup, itemlp, itemup, alp, aup, dp)
subroutine, public hecmw_matrix_reorder_profile(n, perm, iperm, indexl, indexu, iteml, itemu, indexlp, indexup, itemlp, itemup)
subroutine, public hecmw_precond_ssor_22_clear(hecmat)
subroutine, public hecmw_precond_ssor_22_setup(hecmat)
subroutine, public hecmw_precond_ssor_22_apply(zp)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(n, indexl, iteml, indexu, itemu, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(n, indexl, iteml, indexu, itemu, perm_cur, ncolor_in, ncolor_out, colorindex, perm, iperm)