FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_precond_SSOR_66.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
6!C
7!C***
8!C*** module hecmw_precond_SSOR_66
9!C***
10!C
12 use hecmw_util
18 !$ use omp_lib
19
20 private
21
25
26 integer(kind=kint) :: N
27 real(kind=kreal), pointer :: d(:) => null()
28 real(kind=kreal), pointer :: al(:) => null()
29 real(kind=kreal), pointer :: au(:) => null()
30 integer(kind=kint), pointer :: indexL(:) => null()
31 integer(kind=kint), pointer :: indexU(:) => null()
32 integer(kind=kint), pointer :: itemL(:) => null()
33 integer(kind=kint), pointer :: itemU(:) => null()
34 real(kind=kreal), pointer :: alu(:) => null()
35
36 integer(kind=kint) :: NContact = 0
37 real(kind=kreal), pointer :: cal(:) => null()
38 real(kind=kreal), pointer :: cau(:) => null()
39 integer(kind=kint), pointer :: indexCL(:) => null()
40 integer(kind=kint), pointer :: indexCU(:) => null()
41 integer(kind=kint), pointer :: itemCL(:) => null()
42 integer(kind=kint), pointer :: itemCU(:) => null()
43
44 integer(kind=kint) :: NColor
45 integer(kind=kint), pointer :: COLORindex(:) => null()
46 integer(kind=kint), pointer :: perm(:) => null()
47 integer(kind=kint), pointer :: iperm(:) => null()
48
49 logical, save :: isFirst = .true.
50
51contains
52
53 subroutine hecmw_precond_ssor_66_setup(hecMAT)
54 implicit none
55 type(hecmwst_matrix), intent(in) :: hecmat
56 integer(kind=kint ) :: npl, npu, npcl, npcu
57 real (kind=kreal), allocatable :: cd(:)
58 integer(kind=kint ) :: ncolor_in
59 real (kind=kreal) :: sigma_diag
60 real (kind=kreal) :: alutmp(6,6), pw(6)
61 integer(kind=kint ) :: ii, i, j, k
62 integer(kind=kint ) :: nthreads = 1
63 integer(kind=kint ), allocatable :: perm_tmp(:)
64 !real (kind=kreal) :: t0
65
66 !t0 = hecmw_Wtime()
67 !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
68
69 !$ nthreads = omp_get_max_threads()
70
71 n = hecmat%N
72 ! N = hecMAT%NP
73 ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
74 sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
75 ncontact = hecmat%cmat%n_val
76
77 if (ncontact.gt.0) then
78 call hecmw_cmat_lu( hecmat )
79 endif
80
81 if (nthreads == 1) then
82 ncolor = 1
83 allocate(colorindex(0:1), perm(n), iperm(n))
84 colorindex(0) = 0
85 colorindex(1) = n
86 do i=1,n
87 perm(i) = i
88 iperm(i) = i
89 end do
90
91 d => hecmat%D
92 al => hecmat%AL
93 au => hecmat%AU
94 indexl => hecmat%indexL
95 indexu => hecmat%indexU
96 iteml => hecmat%itemL
97 itemu => hecmat%itemU
98 if (ncontact.gt.0) then
99 cal => hecmat%CAL
100 cau => hecmat%CAU
101 indexcl => hecmat%indexCL
102 indexcu => hecmat%indexCU
103 itemcl => hecmat%itemCL
104 itemcu => hecmat%itemCU
105 end if
106 else
107 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
108 call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
109 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
110 !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
111 call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
112 hecmat%indexU, hecmat%itemU, perm_tmp, &
113 ncolor_in, ncolor, colorindex, perm, iperm)
114 !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
115 deallocate(perm_tmp)
116
117 !call write_debug_info
118
119 npl = hecmat%indexL(n)
120 npu = hecmat%indexU(n)
121 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
122 call hecmw_matrix_reorder_profile(n, perm, iperm, &
123 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
124 indexl, indexu, iteml, itemu)
125 !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
126
127 call check_ordering
128
129 allocate(d(36*n), al(36*npl), au(36*npu))
130 call hecmw_matrix_reorder_values(n, 6, perm, iperm, &
131 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
132 hecmat%AL, hecmat%AU, hecmat%D, &
133 indexl, indexu, iteml, itemu, al, au, d)
134 !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
135
136 call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
137 call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
138
139 if (ncontact.gt.0) then
140 npcl = hecmat%indexCL(n)
141 npcu = hecmat%indexCU(n)
142 allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
143 call hecmw_matrix_reorder_profile(n, perm, iperm, &
144 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
145 indexcl, indexcu, itemcl, itemcu)
146
147 allocate(cd(36*n), cal(36*npcl), cau(36*npcu))
148 call hecmw_matrix_reorder_values(n, 6, perm, iperm, &
149 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
150 hecmat%CAL, hecmat%CAU, hecmat%D, &
151 indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
152 deallocate(cd)
153
154 call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
155 call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
156 end if
157
158 end if
159
160 allocate(alu(36*n))
161 alu = 0.d0
162
163 do ii= 1, 36*n
164 alu(ii) = d(ii)
165 enddo
166
167 ! if (NContact.gt.0) then
168 ! do k= 1, hecMAT%cmat%n_val
169 ! if (hecMAT%cmat%pair(k)%i.ne.hecMAT%cmat%pair(k)%j) cycle
170 ! ii = iperm( hecMAT%cmat%pair(k)%i )
171 ! ALU(9*ii-8) = ALU(9*ii-8) + hecMAT%cmat%pair(k)%val(1, 1)
172 ! ALU(9*ii-7) = ALU(9*ii-7) + hecMAT%cmat%pair(k)%val(1, 2)
173 ! ALU(9*ii-6) = ALU(9*ii-6) + hecMAT%cmat%pair(k)%val(1, 3)
174 ! ALU(9*ii-5) = ALU(9*ii-5) + hecMAT%cmat%pair(k)%val(2, 1)
175 ! ALU(9*ii-4) = ALU(9*ii-4) + hecMAT%cmat%pair(k)%val(2, 2)
176 ! ALU(9*ii-3) = ALU(9*ii-3) + hecMAT%cmat%pair(k)%val(2, 3)
177 ! ALU(9*ii-2) = ALU(9*ii-2) + hecMAT%cmat%pair(k)%val(3, 1)
178 ! ALU(9*ii-1) = ALU(9*ii-1) + hecMAT%cmat%pair(k)%val(3, 2)
179 ! ALU(9*ii ) = ALU(9*ii ) + hecMAT%cmat%pair(k)%val(3, 3)
180 ! enddo
181 ! endif
182
183 do ii= 1, n
184 alutmp(1,1)= alu(36*ii-35) * sigma_diag
185 alutmp(1,2)= alu(36*ii-34)
186 alutmp(1,3)= alu(36*ii-33)
187 alutmp(1,4)= alu(36*ii-32)
188 alutmp(1,5)= alu(36*ii-31)
189 alutmp(1,6)= alu(36*ii-30)
190
191 alutmp(2,1)= alu(36*ii-29)
192 alutmp(2,2)= alu(36*ii-28) * sigma_diag
193 alutmp(2,3)= alu(36*ii-27)
194 alutmp(2,4)= alu(36*ii-26)
195 alutmp(2,5)= alu(36*ii-25)
196 alutmp(2,6)= alu(36*ii-24)
197
198 alutmp(3,1)= alu(36*ii-23)
199 alutmp(3,2)= alu(36*ii-22)
200 alutmp(3,3)= alu(36*ii-21) * sigma_diag
201 alutmp(3,4)= alu(36*ii-20)
202 alutmp(3,5)= alu(36*ii-19)
203 alutmp(3,6)= alu(36*ii-18)
204
205 alutmp(4,1)= alu(36*ii-17)
206 alutmp(4,2)= alu(36*ii-16)
207 alutmp(4,3)= alu(36*ii-15)
208 alutmp(4,4)= alu(36*ii-14) * sigma_diag
209 alutmp(4,5)= alu(36*ii-13)
210 alutmp(4,6)= alu(36*ii-12)
211
212 alutmp(5,1)= alu(36*ii-11)
213 alutmp(5,2)= alu(36*ii-10)
214 alutmp(5,3)= alu(36*ii-9 )
215 alutmp(5,4)= alu(36*ii-8 )
216 alutmp(5,5)= alu(36*ii-7 ) * sigma_diag
217 alutmp(5,6)= alu(36*ii-6 )
218
219 alutmp(6,1)= alu(36*ii-5 )
220 alutmp(6,2)= alu(36*ii-4 )
221 alutmp(6,3)= alu(36*ii-3 )
222 alutmp(6,4)= alu(36*ii-2 )
223 alutmp(6,5)= alu(36*ii-1 )
224 alutmp(6,6)= alu(36*ii ) * sigma_diag
225
226 do k= 1, 6
227 alutmp(k,k)= 1.d0/alutmp(k,k)
228 do i= k+1, 6
229 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
230 do j= k+1, 6
231 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
232 enddo
233 do j= k+1, 6
234 alutmp(i,j)= pw(j)
235 enddo
236 enddo
237 enddo
238
239 alu(36*ii-35)= alutmp(1,1)
240 alu(36*ii-34)= alutmp(1,2)
241 alu(36*ii-33)= alutmp(1,3)
242 alu(36*ii-32)= alutmp(1,4)
243 alu(36*ii-31)= alutmp(1,5)
244 alu(36*ii-30)= alutmp(1,6)
245 alu(36*ii-29)= alutmp(2,1)
246 alu(36*ii-28)= alutmp(2,2)
247 alu(36*ii-27)= alutmp(2,3)
248 alu(36*ii-26)= alutmp(2,4)
249 alu(36*ii-25)= alutmp(2,5)
250 alu(36*ii-24)= alutmp(2,6)
251 alu(36*ii-23)= alutmp(3,1)
252 alu(36*ii-22)= alutmp(3,2)
253 alu(36*ii-21)= alutmp(3,3)
254 alu(36*ii-20)= alutmp(3,4)
255 alu(36*ii-19)= alutmp(3,5)
256 alu(36*ii-18)= alutmp(3,6)
257 alu(36*ii-17)= alutmp(4,1)
258 alu(36*ii-16)= alutmp(4,2)
259 alu(36*ii-15)= alutmp(4,3)
260 alu(36*ii-14)= alutmp(4,4)
261 alu(36*ii-13)= alutmp(4,5)
262 alu(36*ii-12)= alutmp(4,6)
263 alu(36*ii-11)= alutmp(5,1)
264 alu(36*ii-10)= alutmp(5,2)
265 alu(36*ii-9 )= alutmp(5,3)
266 alu(36*ii-8 )= alutmp(5,4)
267 alu(36*ii-7 )= alutmp(5,5)
268 alu(36*ii-6 )= alutmp(5,6)
269 alu(36*ii-5 )= alutmp(6,1)
270 alu(36*ii-4 )= alutmp(6,2)
271 alu(36*ii-3 )= alutmp(6,3)
272 alu(36*ii-2 )= alutmp(6,4)
273 alu(36*ii-1 )= alutmp(6,5)
274 alu(36*ii )= alutmp(6,6)
275 enddo
276
277 isfirst = .true.
278
279 !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
280
281 end subroutine hecmw_precond_ssor_66_setup
282
285 implicit none
286 real(kind=kreal), intent(inout) :: zp(:)
287 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
288 real(kind=kreal) :: x1, x2, x3, x4, x5, x6
289 real(kind=kreal) :: sw1, sw2, sw3, sw4, sw5, sw6
290
291 ! added for turning >>>
292 integer(kind=kint), parameter :: numofblockperthread = 100
293 integer(kind=kint), save :: numofthread = 1, numofblock
294 integer(kind=kint), save, allocatable :: ictoblockindex(:)
295 integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
296 integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
297 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
298 real(kind=kreal) :: numofelementperblock
299 integer(kind=kint) :: my_rank
300
301 if (isfirst) then
302 !$ numOfThread = omp_get_max_threads()
303 numofblock = numofthread * numofblockperthread
304 if (allocated(ictoblockindex)) deallocate(ictoblockindex)
305 if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
306 allocate (ictoblockindex(0:ncolor), &
307 blockindextocolorindex(0:numofblock + ncolor))
308 numofelement = n + indexl(n) + indexu(n)
309 numofelementperblock = dble(numofelement) / numofblock
310 blockindex = 0
311 ictoblockindex = -1
312 ictoblockindex(0) = 0
313 blockindextocolorindex = -1
314 blockindextocolorindex(0) = 0
315 my_rank = hecmw_comm_get_rank()
316 ! write(9000+my_rank,*) &
317 ! '# numOfElementPerBlock =', numOfElementPerBlock
318 ! write(9000+my_rank,*) &
319 ! '# ic, blockIndex, colorIndex, elementCount'
320 do ic = 1, ncolor
321 elementcount = 0
322 ii = 1
323 do i = colorindex(ic-1)+1, colorindex(ic)
324 elementcount = elementcount + 1
325 elementcount = elementcount + (indexl(i) - indexl(i-1))
326 elementcount = elementcount + (indexu(i) - indexu(i-1))
327 if (elementcount > ii * numofelementperblock &
328 .or. i == colorindex(ic)) then
329 ii = ii + 1
330 blockindex = blockindex + 1
331 blockindextocolorindex(blockindex) = i
332 ! write(9000+my_rank,*) ic, blockIndex, &
333 ! blockIndexToColorIndex(blockIndex), elementCount
334 endif
335 enddo
336 ictoblockindex(ic) = blockindex
337 enddo
338 numofblock = blockindex
339
341 sectorcachesize0, sectorcachesize1 )
342
343 isfirst = .false.
344 endif
345 ! <<< added for turning
346
347 !call start_collection("loopInPrecond66")
348
349 !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
350 !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
351
352 !$omp parallel default(none) &
353 !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
354 !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
355 !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
356 !$omp&private(SW1,SW2,SW3,SW4,SW5,SW6,X1,X2,X3,X4,X5,X6,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
357
358 !C-- FORWARD
359 do ic=1,ncolor
360 !$omp do schedule (static, 1)
361 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
362 do i = blockindextocolorindex(blockindex-1)+1, &
363 blockindextocolorindex(blockindex)
364 ! do i = startPos(threadNum, ic), endPos(threadNum, ic)
365 iold = perm(i)
366 sw1= zp(6*iold-5)
367 sw2= zp(6*iold-4)
368 sw3= zp(6*iold-3)
369 sw4= zp(6*iold-2)
370 sw5= zp(6*iold-1)
371 sw6= zp(6*iold )
372 isl= indexl(i-1)+1
373 iel= indexl(i)
374 do j= isl, iel
375 !k= perm(itemL(j))
376 k= iteml(j)
377 x1= zp(6*k-5)
378 x2= zp(6*k-4)
379 x3= zp(6*k-3)
380 x4= zp(6*k-2)
381 x5= zp(6*k-1)
382 x6= zp(6*k )
383 sw1= sw1 -al(36*j-35)*x1 -al(36*j-34)*x2 -al(36*j-33)*x3 -al(36*j-32)*x4 -al(36*j-31)*x5 -al(36*j-30)*x6
384 sw2= sw2 -al(36*j-29)*x1 -al(36*j-28)*x2 -al(36*j-27)*x3 -al(36*j-26)*x4 -al(36*j-25)*x5 -al(36*j-24)*x6
385 sw3= sw3 -al(36*j-23)*x1 -al(36*j-22)*x2 -al(36*j-21)*x3 -al(36*j-20)*x4 -al(36*j-19)*x5 -al(36*j-18)*x6
386 sw4= sw4 -al(36*j-17)*x1 -al(36*j-16)*x2 -al(36*j-15)*x3 -al(36*j-14)*x4 -al(36*j-13)*x5 -al(36*j-12)*x6
387 sw5= sw5 -al(36*j-11)*x1 -al(36*j-10)*x2 -al(36*j-9 )*x3 -al(36*j-8 )*x4 -al(36*j-7 )*x5 -al(36*j-6 )*x6
388 sw6= sw6 -al(36*j-5 )*x1 -al(36*j-4 )*x2 -al(36*j-3 )*x3 -al(36*j-2 )*x4 -al(36*j-1 )*x5 -al(36*j )*x6
389 enddo ! j
390
391 if (ncontact.ne.0) then
392 isl= indexcl(i-1)+1
393 iel= indexcl(i)
394 do j= isl, iel
395 !k= perm(itemCL(j))
396 k= itemcl(j)
397 x1= zp(6*k-5)
398 x2= zp(6*k-4)
399 x3= zp(6*k-3)
400 x4= zp(6*k-2)
401 x5= zp(6*k-1)
402 x6= zp(6*k )
403 sw1= sw1 -cal(36*j-35)*x1 -cal(36*j-34)*x2 -cal(36*j-33)*x3 -cal(36*j-32)*x4 -cal(36*j-31)*x5 -cal(36*j-30)*x6
404 sw2= sw2 -cal(36*j-29)*x1 -cal(36*j-28)*x2 -cal(36*j-27)*x3 -cal(36*j-26)*x4 -cal(36*j-25)*x5 -cal(36*j-24)*x6
405 sw3= sw3 -cal(36*j-23)*x1 -cal(36*j-22)*x2 -cal(36*j-21)*x3 -cal(36*j-20)*x4 -cal(36*j-19)*x5 -cal(36*j-18)*x6
406 sw4= sw4 -cal(36*j-17)*x1 -cal(36*j-16)*x2 -cal(36*j-15)*x3 -cal(36*j-14)*x4 -cal(36*j-13)*x5 -cal(36*j-12)*x6
407 sw5= sw5 -cal(36*j-11)*x1 -cal(36*j-10)*x2 -cal(36*j-9 )*x3 -cal(36*j-8 )*x4 -cal(36*j-7 )*x5 -cal(36*j-6 )*x6
408 sw6= sw6 -cal(36*j-5 )*x1 -cal(36*j-4 )*x2 -cal(36*j-3 )*x3 -cal(36*j-2 )*x4 -cal(36*j-1 )*x5 -cal(36*j )*x6
409 enddo ! j
410 endif
411
412 x1= sw1
413 x2= sw2
414 x3= sw3
415 x4= sw4
416 x5= sw5
417 x6= sw6
418 x2= x2 -alu(36*i-29)*x1
419 x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
420 x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
421 x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
422 x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
423 x6= alu(36*i )* x6
424 x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
425 x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
426 x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
427 x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
428 x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
429 zp(6*iold-5)= x1
430 zp(6*iold-4)= x2
431 zp(6*iold-3)= x3
432 zp(6*iold-2)= x4
433 zp(6*iold-1)= x5
434 zp(6*iold )= x6
435 enddo ! i
436 enddo ! blockIndex
437 !$omp end do
438 enddo ! ic
439
440 !C-- BACKWARD
441 do ic=ncolor, 1, -1
442 !$omp do schedule (static, 1)
443 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
444 do i = blockindextocolorindex(blockindex), &
445 blockindextocolorindex(blockindex-1)+1, -1
446 ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
447 ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
448 ! blockIndexToColorIndex(blockIndex)
449 ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
450 sw1= 0.d0
451 sw2= 0.d0
452 sw3= 0.d0
453 sw4= 0.d0
454 sw5= 0.d0
455 sw6= 0.d0
456 isu= indexu(i-1) + 1
457 ieu= indexu(i)
458 do j= ieu, isu, -1
459 !k= perm(itemU(j))
460 k= itemu(j)
461 x1= zp(6*k-5)
462 x2= zp(6*k-4)
463 x3= zp(6*k-3)
464 x4= zp(6*k-2)
465 x5= zp(6*k-1)
466 x6= zp(6*k )
467 sw1= sw1 +au(36*j-35)*x1 +au(36*j-34)*x2 +au(36*j-33)*x3 +au(36*j-32)*x4 +au(36*j-31)*x5 +au(36*j-30)*x6
468 sw2= sw2 +au(36*j-29)*x1 +au(36*j-28)*x2 +au(36*j-27)*x3 +au(36*j-26)*x4 +au(36*j-25)*x5 +au(36*j-24)*x6
469 sw3= sw3 +au(36*j-23)*x1 +au(36*j-22)*x2 +au(36*j-21)*x3 +au(36*j-20)*x4 +au(36*j-19)*x5 +au(36*j-18)*x6
470 sw4= sw4 +au(36*j-17)*x1 +au(36*j-16)*x2 +au(36*j-15)*x3 +au(36*j-14)*x4 +au(36*j-13)*x5 +au(36*j-12)*x6
471 sw5= sw5 +au(36*j-11)*x1 +au(36*j-10)*x2 +au(36*j-9 )*x3 +au(36*j-8 )*x4 +au(36*j-7 )*x5 +au(36*j-6 )*x6
472 sw6= sw6 +au(36*j-5 )*x1 +au(36*j-4 )*x2 +au(36*j-3 )*x3 +au(36*j-2 )*x4 +au(36*j-1 )*x5 +au(36*j )*x6
473 enddo ! j
474
475 if (ncontact.gt.0) then
476 isu= indexcu(i-1) + 1
477 ieu= indexcu(i)
478 do j= ieu, isu, -1
479 !k= perm(itemCU(j))
480 k= itemcu(j)
481 x1= zp(6*k-5)
482 x2= zp(6*k-4)
483 x3= zp(6*k-3)
484 x4= zp(6*k-2)
485 x5= zp(6*k-1)
486 x6= zp(6*k )
487 sw1= sw1 +cau(36*j-35)*x1 +cau(36*j-34)*x2 +cau(36*j-33)*x3 +cau(36*j-32)*x4 +cau(36*j-31)*x5 +cau(36*j-30)*x6
488 sw2= sw2 +cau(36*j-29)*x1 +cau(36*j-28)*x2 +cau(36*j-27)*x3 +cau(36*j-26)*x4 +cau(36*j-25)*x5 +cau(36*j-24)*x6
489 sw3= sw3 +cau(36*j-23)*x1 +cau(36*j-22)*x2 +cau(36*j-21)*x3 +cau(36*j-20)*x4 +cau(36*j-19)*x5 +cau(36*j-18)*x6
490 sw4= sw4 +cau(36*j-17)*x1 +cau(36*j-16)*x2 +cau(36*j-15)*x3 +cau(36*j-14)*x4 +cau(36*j-13)*x5 +cau(36*j-12)*x6
491 sw5= sw5 +cau(36*j-11)*x1 +cau(36*j-10)*x2 +cau(36*j-9 )*x3 +cau(36*j-8 )*x4 +cau(36*j-7 )*x5 +cau(36*j-6 )*x6
492 sw6= sw6 +cau(36*j-5 )*x1 +cau(36*j-4 )*x2 +cau(36*j-3 )*x3 +cau(36*j-2 )*x4 +cau(36*j-1 )*x5 +cau(36*j )*x6
493 enddo ! j
494 endif
495
496 x1= sw1
497 x2= sw2
498 x3= sw3
499 x4= sw4
500 x5= sw5
501 x6= sw6
502 x2= x2 -alu(36*i-29)*x1
503 x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
504 x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
505 x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
506 x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
507 x6= alu(36*i )* x6
508 x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
509 x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
510 x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
511 x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
512 x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
513 iold = perm(i)
514 zp(6*iold-5)= zp(6*iold-5) -x1
515 zp(6*iold-4)= zp(6*iold-4) -x2
516 zp(6*iold-3)= zp(6*iold-3) -x3
517 zp(6*iold-2)= zp(6*iold-2) -x4
518 zp(6*iold-1)= zp(6*iold-1) -x5
519 zp(6*iold )= zp(6*iold ) -x6
520 enddo ! i
521 enddo ! blockIndex
522 !$omp end do
523 enddo ! ic
524 !$omp end parallel
525
526 !OCL END_CACHE_SUBSECTOR
527 !OCL END_CACHE_SECTOR_SIZE
528
529 !call stop_collection("loopInPrecond66")
530
531 end subroutine hecmw_precond_ssor_66_apply
532
534 implicit none
535 type(hecmwst_matrix), intent(inout) :: hecmat
536 integer(kind=kint ) :: nthreads = 1
537 !$ nthreads = omp_get_max_threads()
538 if (associated(colorindex)) deallocate(colorindex)
539 if (associated(perm)) deallocate(perm)
540 if (associated(iperm)) deallocate(iperm)
541 if (associated(alu)) deallocate(alu)
542 if (nthreads >= 1) then
543 if (associated(d)) deallocate(d)
544 if (associated(al)) deallocate(al)
545 if (associated(au)) deallocate(au)
546 if (associated(indexl)) deallocate(indexl)
547 if (associated(indexu)) deallocate(indexu)
548 if (associated(iteml)) deallocate(iteml)
549 if (associated(itemu)) deallocate(itemu)
550 if (ncontact.ne.0) then
551 if (associated(cal)) deallocate(cal)
552 if (associated(cau)) deallocate(cau)
553 if (associated(indexcl)) deallocate(indexcl)
554 if (associated(indexcu)) deallocate(indexcu)
555 if (associated(itemcl)) deallocate(itemcl)
556 if (associated(itemcu)) deallocate(itemcu)
557 end if
558 end if
559 nullify(colorindex)
560 nullify(perm)
561 nullify(iperm)
562 nullify(alu)
563 nullify(d)
564 nullify(al)
565 nullify(au)
566 nullify(indexl)
567 nullify(indexu)
568 nullify(iteml)
569 nullify(itemu)
570 if (ncontact.ne.0) then
571 nullify(cal)
572 nullify(cau)
573 nullify(indexcl)
574 nullify(indexcu)
575 nullify(itemcl)
576 nullify(itemcu)
577 call hecmw_cmat_lu_free( hecmat )
578 endif
579 ncontact = 0
580 end subroutine hecmw_precond_ssor_66_clear
581
582 subroutine write_debug_info
583 implicit none
584 integer(kind=kint) :: my_rank, ic, in
585 my_rank = hecmw_comm_get_rank()
586 !--------------------> debug: shizawa
587 if (my_rank.eq.0) then
588 write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
589 endif
590 write(19000+my_rank,'(a)') '#NCOLORTot'
591 write(19000+my_rank,*) ncolor
592 write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
593 do ic=1,ncolor
594 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
595 enddo ! ic
596 write(29000+my_rank,'(a)') '#n_node'
597 write(29000+my_rank,*) n
598 write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
599 do in=1,n
600 write(29000+my_rank,*) in, iperm(in), perm(in)
601 if (perm(iperm(in)) .ne. in) then
602 write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
603 endif
604 enddo
605 end subroutine write_debug_info
606
607 subroutine check_ordering
608 implicit none
609 integer(kind=kint) :: ic, i, j, k
610 integer(kind=kint), allocatable :: iicolor(:)
611 ! check color dependence of neighbouring nodes
612 if (ncolor.gt.1) then
613 allocate(iicolor(n))
614 do ic=1,ncolor
615 do i= colorindex(ic-1)+1, colorindex(ic)
616 iicolor(i) = ic
617 enddo ! i
618 enddo ! ic
619 ! FORWARD: L-part
620 do ic=1,ncolor
621 do i= colorindex(ic-1)+1, colorindex(ic)
622 do j= indexl(i-1)+1, indexl(i)
623 k= iteml(j)
624 if (iicolor(i).eq.iicolor(k)) then
625 write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
626 endif
627 enddo ! j
628 enddo ! i
629 enddo ! ic
630 ! BACKWARD: U-part
631 do ic=ncolor, 1, -1
632 do i= colorindex(ic), colorindex(ic-1)+1, -1
633 do j= indexu(i-1)+1, indexu(i)
634 k= itemu(j)
635 if (iicolor(i).eq.iicolor(k)) then
636 write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
637 endif
638 enddo ! j
639 enddo ! i
640 enddo ! ic
641 deallocate(iicolor)
642 endif ! if (NColor.gt.1)
643 !--------------------< debug: shizawa
644 end subroutine check_ordering
645
646end module hecmw_precond_ssor_66
subroutine, public hecmw_cmat_lu(hecmat)
subroutine, public hecmw_cmat_lu_free(hecmat)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecmat)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecmat)
subroutine, public hecmw_matrix_reorder_renum_item(n, perm, indexxp, itemxp)
subroutine, public hecmw_matrix_reorder_values(n, ndof, perm, iperm, indexl, indexu, iteml, itemu, al, au, d, indexlp, indexup, itemlp, itemup, alp, aup, dp)
subroutine, public hecmw_matrix_reorder_profile(n, perm, iperm, indexl, indexu, iteml, itemu, indexlp, indexup, itemlp, itemup)
subroutine, public hecmw_precond_ssor_66_apply(zp)
subroutine, public hecmw_precond_ssor_66_clear(hecmat)
subroutine, public hecmw_precond_ssor_66_setup(hecmat)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(n, indexl, iteml, indexu, itemu, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(n, indexl, iteml, indexu, itemu, perm_cur, ncolor_in, ncolor_out, colorindex, perm, iperm)