26 integer(kind=kint) :: N
27 real(kind=
kreal),
pointer :: d(:) => null()
28 real(kind=
kreal),
pointer :: al(:) => null()
29 real(kind=
kreal),
pointer :: au(:) => null()
30 integer(kind=kint),
pointer :: indexL(:) => null()
31 integer(kind=kint),
pointer :: indexU(:) => null()
32 integer(kind=kint),
pointer :: itemL(:) => null()
33 integer(kind=kint),
pointer :: itemU(:) => null()
34 real(kind=
kreal),
pointer :: alu(:) => null()
36 integer(kind=kint) :: NContact = 0
37 real(kind=
kreal),
pointer :: cal(:) => null()
38 real(kind=
kreal),
pointer :: cau(:) => null()
39 integer(kind=kint),
pointer :: indexCL(:) => null()
40 integer(kind=kint),
pointer :: indexCU(:) => null()
41 integer(kind=kint),
pointer :: itemCL(:) => null()
42 integer(kind=kint),
pointer :: itemCU(:) => null()
44 integer(kind=kint) :: NColor
45 integer(kind=kint),
pointer :: COLORindex(:) => null()
46 integer(kind=kint),
pointer :: perm(:) => null()
47 integer(kind=kint),
pointer :: iperm(:) => null()
49 logical,
save :: isFirst = .true.
51 logical,
save :: INITIALIZED = .false.
54 integer(kind=kint),
parameter :: numOfBlockPerThread = 100
55 integer(kind=kint),
save :: numOfThread = 1, numofblock
56 integer(kind=kint),
save,
allocatable :: icToBlockIndex(:)
57 integer(kind=kint),
save,
allocatable :: blockIndexToColorIndex(:)
58 integer(kind=kint),
save :: sectorCacheSize0, sectorCacheSize1
60 integer(kind=kint),
parameter :: DEBUG = 0
67 integer(kind=kint ) :: npl, npu, npcl, npcu
68 real (kind=
kreal),
allocatable :: cd(:)
69 integer(kind=kint ) :: ncolor_in
70 real (kind=
kreal) :: sigma_diag
71 real (kind=
kreal) :: alutmp(3,3), pw(3)
72 integer(kind=kint ) :: ii, i, j, k
73 integer(kind=kint ) :: nthreads = 1
74 integer(kind=kint ),
allocatable :: perm_tmp(:)
75 real (kind=
kreal) :: t0
79 write(*,*)
'DEBUG: SSOR setup start',
hecmw_wtime()-t0
83 if (hecmat%Iarray(98) == 1)
then
85 else if (hecmat%Iarray(97) == 1)
then
98 ncontact = hecmat%cmat%n_val
100 if (ncontact.gt.0)
then
104 if (nthreads == 1)
then
106 allocate(colorindex(0:1), perm(n), iperm(n))
114 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
116 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
117 if (debug >= 1)
write(*,*)
'DEBUG: RCM ordering done',
hecmw_wtime()-t0
119 hecmat%indexU, hecmat%itemU, perm_tmp, &
120 ncolor_in, ncolor, colorindex, perm, iperm)
121 if (debug >= 1)
write(*,*)
'DEBUG: MC ordering done',
hecmw_wtime()-t0
127 npl = hecmat%indexL(n)
128 npu = hecmat%indexU(n)
129 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
131 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
132 indexl, indexu, iteml, itemu)
133 if (debug >= 1)
write(*,*)
'DEBUG: reordering profile done',
hecmw_wtime()-t0
137 allocate(d(9*n), al(9*npl), au(9*npu))
139 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
140 hecmat%AL, hecmat%AU, hecmat%D, &
141 indexl, indexu, iteml, itemu, al, au, d)
142 if (debug >= 1)
write(*,*)
'DEBUG: reordering values done',
hecmw_wtime()-t0
147 if (ncontact.gt.0)
then
148 npcl = hecmat%indexCL(n)
149 npcu = hecmat%indexCU(n)
150 allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
152 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
153 indexcl, indexcu, itemcl, itemcu)
155 allocate(cd(9*n), cal(9*npcl), cau(9*npcu))
157 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
158 hecmat%CAL, hecmat%CAU, hecmat%D, &
159 indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
173 if (ncontact.gt.0)
then
174 do k= 1, hecmat%cmat%n_val
175 if (hecmat%cmat%pair(k)%i.ne.hecmat%cmat%pair(k)%j) cycle
176 ii = iperm( hecmat%cmat%pair(k)%i )
177 alu(9*ii-8) = alu(9*ii-8) + hecmat%cmat%pair(k)%val(1, 1)
178 alu(9*ii-7) = alu(9*ii-7) + hecmat%cmat%pair(k)%val(1, 2)
179 alu(9*ii-6) = alu(9*ii-6) + hecmat%cmat%pair(k)%val(1, 3)
180 alu(9*ii-5) = alu(9*ii-5) + hecmat%cmat%pair(k)%val(2, 1)
181 alu(9*ii-4) = alu(9*ii-4) + hecmat%cmat%pair(k)%val(2, 2)
182 alu(9*ii-3) = alu(9*ii-3) + hecmat%cmat%pair(k)%val(2, 3)
183 alu(9*ii-2) = alu(9*ii-2) + hecmat%cmat%pair(k)%val(3, 1)
184 alu(9*ii-1) = alu(9*ii-1) + hecmat%cmat%pair(k)%val(3, 2)
185 alu(9*ii ) = alu(9*ii ) + hecmat%cmat%pair(k)%val(3, 3)
192 alutmp(1,1)= alu(9*ii-8) * sigma_diag
193 alutmp(1,2)= alu(9*ii-7)
194 alutmp(1,3)= alu(9*ii-6)
195 alutmp(2,1)= alu(9*ii-5)
196 alutmp(2,2)= alu(9*ii-4) * sigma_diag
197 alutmp(2,3)= alu(9*ii-3)
198 alutmp(3,1)= alu(9*ii-2)
199 alutmp(3,2)= alu(9*ii-1)
200 alutmp(3,3)= alu(9*ii ) * sigma_diag
202 alutmp(k,k)= 1.d0/alutmp(k,k)
204 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
206 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
213 alu(9*ii-8)= alutmp(1,1)
214 alu(9*ii-7)= alutmp(1,2)
215 alu(9*ii-6)= alutmp(1,3)
216 alu(9*ii-5)= alutmp(2,1)
217 alu(9*ii-4)= alutmp(2,2)
218 alu(9*ii-3)= alutmp(2,3)
219 alu(9*ii-2)= alutmp(3,1)
220 alu(9*ii-1)= alutmp(3,2)
221 alu(9*ii )= alutmp(3,3)
229 hecmat%Iarray(98) = 0
230 hecmat%Iarray(97) = 0
232 if (debug >= 1)
write(*,*)
'DEBUG: SSOR setup done',
hecmw_wtime()-t0
236 subroutine setup_tuning_parameters
239 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
240 real(kind=
kreal) :: numofelementperblock
241 integer(kind=kint) :: my_rank
242 integer(kind=kint) :: ic, i
243 if (debug >= 1)
write(*,*)
'DEBUG: setting up tuning parameters for SSOR'
245 numofblock = numofthread * numofblockperthread
246 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
247 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
248 allocate (ictoblockindex(0:ncolor), &
249 blockindextocolorindex(0:numofblock + ncolor))
250 numofelement = n + indexl(n) + indexu(n)
251 numofelementperblock = dble(numofelement) / numofblock
254 ictoblockindex(0) = 0
255 blockindextocolorindex = -1
256 blockindextocolorindex(0) = 0
265 do i = colorindex(ic-1)+1, colorindex(ic)
266 elementcount = elementcount + 1
267 elementcount = elementcount + (indexl(i) - indexl(i-1))
268 elementcount = elementcount + (indexu(i) - indexu(i-1))
269 if (elementcount > ii * numofelementperblock &
270 .or. i == colorindex(ic))
then
272 blockindex = blockindex + 1
273 blockindextocolorindex(blockindex) = i
278 ictoblockindex(ic) = blockindex
280 numofblock = blockindex
283 sectorcachesize0, sectorcachesize1 )
284 end subroutine setup_tuning_parameters
288 real(kind=
kreal),
intent(inout) :: zp(:)
289 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
290 real(kind=
kreal) :: sw1, sw2, sw3, x1, x2, x3
293 integer(kind=kint) :: blockindex
296 call setup_tuning_parameters
315 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
316 do i = blockindextocolorindex(blockindex-1)+1, &
317 blockindextocolorindex(blockindex)
331 sw1= sw1 - al(9*j-8)*x1 - al(9*j-7)*x2 - al(9*j-6)*x3
332 sw2= sw2 - al(9*j-5)*x1 - al(9*j-4)*x2 - al(9*j-3)*x3
333 sw3= sw3 - al(9*j-2)*x1 - al(9*j-1)*x2 - al(9*j )*x3
336 if (ncontact.ne.0)
then
345 sw1= sw1 - cal(9*j-8)*x1 - cal(9*j-7)*x2 - cal(9*j-6)*x3
346 sw2= sw2 - cal(9*j-5)*x1 - cal(9*j-4)*x2 - cal(9*j-3)*x3
347 sw3= sw3 - cal(9*j-2)*x1 - cal(9*j-1)*x2 - cal(9*j )*x3
354 x2= x2 - alu(9*i-5)*x1
355 x3= x3 - alu(9*i-2)*x1 - alu(9*i-1)*x2
357 x2= alu(9*i-4)*( x2 - alu(9*i-3)*x3 )
358 x1= alu(9*i-8)*( x1 - alu(9*i-6)*x3 - alu(9*i-7)*x2)
370 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
371 do i = blockindextocolorindex(blockindex), &
372 blockindextocolorindex(blockindex-1)+1, -1
388 sw1= sw1 + au(9*j-8)*x1 + au(9*j-7)*x2 + au(9*j-6)*x3
389 sw2= sw2 + au(9*j-5)*x1 + au(9*j-4)*x2 + au(9*j-3)*x3
390 sw3= sw3 + au(9*j-2)*x1 + au(9*j-1)*x2 + au(9*j )*x3
393 if (ncontact.gt.0)
then
394 isu= indexcu(i-1) + 1
402 sw1= sw1 + cau(9*j-8)*x1 + cau(9*j-7)*x2 + cau(9*j-6)*x3
403 sw2= sw2 + cau(9*j-5)*x1 + cau(9*j-4)*x2 + cau(9*j-3)*x3
404 sw3= sw3 + cau(9*j-2)*x1 + cau(9*j-1)*x2 + cau(9*j )*x3
411 x2= x2 - alu(9*i-5)*x1
412 x3= x3 - alu(9*i-2)*x1 - alu(9*i-1)*x2
414 x2= alu(9*i-4)*( x2 - alu(9*i-3)*x3 )
415 x1= alu(9*i-8)*( x1 - alu(9*i-6)*x3 - alu(9*i-7)*x2)
417 zp(3*iold-2)= zp(3*iold-2) - x1
418 zp(3*iold-1)= zp(3*iold-1) - x2
419 zp(3*iold )= zp(3*iold ) - x3
436 integer(kind=kint ) :: nthreads = 1
438 if (
associated(colorindex))
deallocate(colorindex)
439 if (
associated(perm))
deallocate(perm)
440 if (
associated(iperm))
deallocate(iperm)
441 if (
associated(alu))
deallocate(alu)
442 if (nthreads >= 1)
then
443 if (
associated(d))
deallocate(d)
444 if (
associated(al))
deallocate(al)
445 if (
associated(au))
deallocate(au)
446 if (
associated(indexl))
deallocate(indexl)
447 if (
associated(indexu))
deallocate(indexu)
448 if (
associated(iteml))
deallocate(iteml)
449 if (
associated(itemu))
deallocate(itemu)
450 if (ncontact.ne.0)
then
451 if (
associated(cal))
deallocate(cal)
452 if (
associated(cau))
deallocate(cau)
453 if (
associated(indexcl))
deallocate(indexcl)
454 if (
associated(indexcu))
deallocate(indexcu)
455 if (
associated(itemcl))
deallocate(itemcl)
456 if (
associated(itemcu))
deallocate(itemcu)
470 if (ncontact.ne.0)
then
480 initialized = .false.
483 subroutine write_debug_info
485 integer(kind=kint) :: my_rank, ic, in
488 if (my_rank.eq.0)
then
489 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
491 write(19000+my_rank,
'(a)')
'#NCOLORTot'
492 write(19000+my_rank,*) ncolor
493 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
495 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
497 write(29000+my_rank,
'(a)')
'#n_node'
498 write(29000+my_rank,*) n
499 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
501 write(29000+my_rank,*) in, iperm(in), perm(in)
502 if (perm(iperm(in)) .ne. in)
then
503 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
506 end subroutine write_debug_info
508 subroutine check_ordering
510 integer(kind=kint) :: ic, i, j, k
511 integer(kind=kint),
allocatable :: iicolor(:)
513 if (ncolor.gt.1)
then
516 do i= colorindex(ic-1)+1, colorindex(ic)
522 do i= colorindex(ic-1)+1, colorindex(ic)
523 do j= indexl(i-1)+1, indexl(i)
525 if (iicolor(i).eq.iicolor(k))
then
526 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
533 do i= colorindex(ic), colorindex(ic-1)+1, -1
534 do j= indexu(i-1)+1, indexu(i)
536 if (iicolor(i).eq.iicolor(k))
then
537 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
545 end subroutine check_ordering
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecmat)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecmat)
subroutine, public hecmw_matrix_reorder_renum_item(n, perm, indexxp, itemxp)
subroutine, public hecmw_matrix_reorder_values(n, ndof, perm, iperm, indexl, indexu, iteml, itemu, al, au, d, indexlp, indexup, itemlp, itemup, alp, aup, dp)
subroutine, public hecmw_matrix_reorder_profile(n, perm, iperm, indexl, indexu, iteml, itemu, indexlp, indexup, itemlp, itemup)
subroutine, public hecmw_precond_ssor_33_apply(zp)
subroutine, public hecmw_precond_ssor_33_setup(hecmat)
subroutine, public hecmw_precond_ssor_33_clear(hecmat)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
real(kind=kreal) function hecmw_wtime()
subroutine, public hecmw_matrix_ordering_rcm(n, indexl, iteml, indexu, itemu, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(n, indexl, iteml, indexu, itemu, perm_cur, ncolor_in, ncolor_out, colorindex, perm, iperm)