FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_ClusterMKL_wrapper.F90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
6#ifdef WITH_MKL
7include 'mkl_cluster_sparse_solver.f90'
8#endif
9
11 use hecmw_util
14
15#ifdef WITH_MKL
16 use mkl_cluster_sparse_solver
17#endif
18
19 implicit none
20
21 private ! default
22 public hecmw_clustermkl_wrapper ! only entry point of Parallel Direct Solver is public
23
24 logical, save :: INITIALIZED = .false.
25#ifdef WITH_MKL
26 type(MKL_CLUSTER_SPARSE_SOLVER_HANDLE) :: pt(64)
27#endif
28 integer maxfct, mnum, mtype, nrhs, msglvl
29 integer idum(1), i
30 data nrhs /1/, maxfct /1/, mnum /1/
31
32 integer, save :: iparm(64)
33 integer(kind=kint), save :: nn
34 integer(kind=kint), pointer, save :: irow(:), jcol(:)
35 real(kind=kreal), pointer, save :: aval(:), rhs(:), solx(:)
36
37 integer(kind=kint), parameter :: debug=0
38
39contains
40
41 subroutine hecmw_clustermkl_wrapper(spMAT, phase_start, solution, istat)
42 implicit none
43 type (sparse_matrix), intent(inout) :: spmat
44 integer(kind=kint), intent(in) :: phase_start
45 integer(kind=kint), intent(out) :: istat
46 real(kind=kreal), pointer, intent(inout) :: solution(:)
47
48 integer(kind=kint) :: myrank, phase
49 real(kind=kreal) :: t1,t2,t3,t4,t5
50
51#ifdef WITH_MKL
52
53 myrank=hecmw_comm_get_rank()
54
55 if (.not. initialized) then
56 do i=1,64
57 pt(i)%dummy = 0
58 enddo
59 iparm(:) = 0
60 iparm(1) = 1 ! no solver default
61 iparm(2) = 3 ! fill-in reordering from METIS
62 iparm(3) = 1
63 iparm(8) = 2
64 if (spmat%symtype == sparse_matrix_symtype_asym) then
65 iparm(10) = 13 ! perturbe the pivot elements with 1E-13
66 else
67 iparm(10) = 8 ! perturbe the pivot elements with 1E-8
68 endif
69 iparm(11) = 1 ! Enable scaling
70 iparm(13) = 1 ! Enable matching
71 iparm(18) = 0
72 iparm(19) = 0
73 msglvl = 0 ! print statistical information
74 initialized = .true.
75 else
76 if( phase_start == 1 ) then
77 phase = -1
78 call cluster_sparse_solver ( &
79 pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
80 idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
81 if (istat < 0) then
82 write(*,*) 'ERROR: MKL returned with error in phase -1', istat
83 return
84 endif
85 if( spmat%type == sparse_matrix_type_coo ) deallocate(irow,jcol,aval,rhs,solx)
86 end if
87 endif
88
89 ! Additional setup
90 t1=hecmw_wtime()
91 if ( phase_start == 1 ) then
92 if (spmat%symtype == sparse_matrix_symtype_spd) then
93 mtype = 2
94 else if (spmat%symtype == sparse_matrix_symtype_sym) then
95 mtype = -2
96 else
97 mtype = 11
98 endif
99 endif
100
101 if( spmat%type == sparse_matrix_type_csr ) then
102 iparm(40) = 2 ! Input: distributed matrix/rhs/solution format
103 iparm(41) = spmat%DISPLS(myrank+1)+1
104 iparm(42) = spmat%DISPLS(myrank+1)+spmat%N_COUNTS(myrank+1)
105 call sort_column_ascending_order(spmat,myrank)
106 nn = spmat%N
107 irow => spmat%IRN
108 jcol => spmat%JCN
109 aval => spmat%A
110 rhs => spmat%rhs
111 solx => solution
112 else if( spmat%type == sparse_matrix_type_coo ) then
113 iparm(40) = 0 ! Input: centralized input format( mkl don't have distributed nonsymmetric matrix input...)
114 !input matrix is gathered to rank0 if matrix is given in COO format
115 call export_spmat_to_centralizedcrs(spmat,myrank,nn,irow,jcol,aval,rhs,solx)
116 end if
117
118 t2=hecmw_wtime()
119 if (myrank==0 .and. spmat%timelog > 0) &
120 write(*,'(A,f10.3)') ' [Cluster Pardiso]: Additional Setup completed. time(sec)=',t2-t1
121
122 if ( phase_start == 1 ) then
123 ! ANALYSIS
124 t2=hecmw_wtime()
125
126 phase = 11
127 call cluster_sparse_solver ( &
128 pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
129 idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
130 if (istat < 0) then
131 if (myrank==0 .and. spmat%timelog > 0) call print_iparm_paramters()
132 write(*,*) 'ERROR: MKL returned with error in phase 1', istat
133 return
134 endif
135
136 t3=hecmw_wtime()
137 if (myrank==0 .and. spmat%timelog > 0) &
138 write(*,'(A,f10.3)') ' [Cluster Pardiso]: Analysis completed. time(sec)=',t3-t2
139 endif
140
141 ! FACTORIZATION
142 if ( phase_start .le. 2 ) then
143 t3=hecmw_wtime()
144
145 phase = 22
146 call cluster_sparse_solver ( &
147 pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
148 idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
149 if (istat < 0) then
150 if (myrank==0 .and. spmat%timelog > 0) call print_iparm_paramters()
151 write(*,*) 'ERROR: MKL returned with error in phase 2', istat
152 return
153 endif
154 t4=hecmw_wtime()
155 if (myrank==0 .and. spmat%timelog > 0) &
156 write(*,'(A,f10.3)') ' [Cluster Pardiso]: Factorization completed. time(sec)=',t4-t3
157 endif
158
159 t4=hecmw_wtime()
160
161 ! SOLUTION
162 phase = 33
163 call cluster_sparse_solver ( &
164 pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
165 idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
166 if (istat < 0) then
167 if (myrank==0 .and. spmat%timelog > 0) call print_iparm_paramters()
168 write(*,*) 'ERROR: MKL returned with error in phase 3', istat
169 return
170 endif
171
172 if( spmat%type == sparse_matrix_type_coo ) then !scatter global solution
173 call sparse_matrix_scatter_rhs(spmat, solx)
174 do i=1,spmat%N_loc
175 solution(i) = spmat%rhs(i)
176 end do
177 endif
178
179 ! solution is written to hecMAT%X. you have to edit lib/solve_LINEQ.f90
180 ! to use this routine properly.
181
182 t5=hecmw_wtime()
183 if (myrank==0 .and. spmat%timelog > 0) then
184 write(*,'(A,f10.3)') ' [Cluster Pardiso]: Solution completed. time(sec)=',t5-t4
185 end if
186 if( debug>0 .and. myrank==0 ) call print_iparm_paramters()
187
188#else
189 stop "MKL Pardiso not available"
190#endif
191 end subroutine hecmw_clustermkl_wrapper
192
193#ifdef WITH_MKL
194 subroutine print_iparm_paramters()
195 write(*,'(A60,I8)') 'Number of iterative refinement steps performed: ',iparm(7)
196 write(*,'(A60,I8)') 'Number of perturbed pivots: ',iparm(14)
197 write(*,'(A60,I8)') 'Peak memory on symbolic factorization: ',iparm(15)
198 write(*,'(A60,I8)') 'Permanent memory on symbolic factorization: ',iparm(16)
199 write(*,'(A60,I8)') 'Size of factors/Peak memory on num. fact. and sol: ',iparm(17)
200 write(*,'(A60,I8)') 'The number of non-zero elements in the factors: ',iparm(18)
201 if( mtype < 11 ) then
202 write(*,'(A60,I8)') 'Number of positive eigenvalues: ',iparm(22)
203 write(*,'(A60,I8)') 'Number of negative eigenvalues: ',iparm(23)
204 end if
205 end subroutine
206
207 subroutine export_spmat_to_centralizedcrs(spMAT,myrank,n,ia,ja,a,b,x)
208 type (sparse_matrix), intent(in) :: spmat
209 integer(kind=kint), intent(in) :: myrank
210 integer(kind=kint), intent(out) :: n
211 integer(kind=kint), pointer, intent(inout) :: ia(:), ja(:)
212 real(kind=kreal), pointer, intent(inout) :: a(:), b(:), x(:)
213
214 integer(kind=kint) :: i, nprocs, ierr, nnz
215 integer(kind=kint), allocatable :: dispmat(:), ncounts(:)
216 real(kind=kreal) :: t1,t2,t3,t4
217
218 t1=hecmw_wtime()
219
220 nprocs = hecmw_comm_get_size()
221 allocate(dispmat(nprocs),ncounts(nprocs))
222 if (nprocs > 1) then
223 call hecmw_allgather_int_1(spmat%NZ, ncounts, hecmw_comm_get_comm())
224 endif
225 n = spmat%N
226 nnz = 0
227 dispmat(1)=0
228 do i=1,nprocs-1
229 dispmat(i+1)=dispmat(i)+ncounts(i)
230 nnz = nnz + ncounts(i)
231 enddo
232 nnz = nnz + ncounts(nprocs)
233
234 if( myrank == 0 ) then
235 ierr = 0
236 if( .not. associated(ia) ) allocate(ia(nnz+n), stat=ierr)
237 if (ierr /= 0) then
238 write(*,*) " Allocation error in Setup Cluster MKL, ia",ierr,nnz
240 endif
241 if( .not. associated(ja) ) allocate(ja(nnz+n), stat=ierr)
242 if (ierr /= 0) then
243 write(*,*) " Allocation error in Setup Cluster MKL, ja",ierr,nnz
245 endif
246 if( .not. associated(a) ) allocate(a(nnz+n), stat=ierr)
247 if (ierr /= 0) then
248 write(*,*) " Allocation error in Setup Cluster MKL, a",ierr,nnz
250 endif
251 if( .not. associated(b) ) allocate(b(n), stat=ierr)
252 if (ierr /= 0) then
253 write(*,*) " Allocation error in Setup Cluster MKL, b",ierr,n
255 endif
256 if( .not. associated(x) ) allocate(x(n), stat=ierr)
257 if (ierr /= 0) then
258 write(*,*) " Allocation error in Setup Cluster MKL, x",ierr,n
260 endif
261 else !dummy
262 allocate(ia(1),ja(1),a(1),b(1),x(1))
263 end if
264
265 t2=hecmw_wtime()
266 if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
267 write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Allocate Matrix time(sec)=',t2-t1
268
269 !gather matrix components to rank 0
270
271 call hecmw_gatherv_int(spmat%IRN, spmat%NZ, ia, ncounts, dispmat, 0, hecmw_comm_get_comm())
272 call hecmw_gatherv_int(spmat%JCN, spmat%NZ, ja, ncounts, dispmat, 0, hecmw_comm_get_comm())
273 call hecmw_gatherv_real(spmat%A, spmat%NZ, a, ncounts, dispmat, 0, hecmw_comm_get_comm())
274 if( myrank == 0 ) then !cluster pardiso must be given diagonal components even if they are zero.
275 do i=1,n
276 ia(nnz+i) = i
277 ja(nnz+i) = i
278 a(nnz+i) = 0.d0
279 end do
280 nnz = nnz + n
281 endif
282 call sparse_matrix_gather_rhs(spmat, b)
283
284 t3=hecmw_wtime()
285 if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
286 write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Gather Matrix time(sec)=',t3-t2
287
288 !convert COO to CRS
289 if( myrank==0 ) then
290 call coo2csr(n, nnz, ia, ja, a)
291 if( debug>0 ) call check_csr(n, nnz, ia, ja, a)
292 endif
293
294 t4=hecmw_wtime()
295 if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
296 write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Convert Matrix Format time(sec)=',t4-t3
297
298 deallocate(dispmat,ncounts)
299 end subroutine
300
301 subroutine coo2csr(n, nnz, ia, ja, a)
302 integer(kind=kint), intent(in) :: n
303 integer(kind=kint), intent(inout) :: nnz
304 integer(kind=kint), pointer, intent(inout) :: ia(:)
305 integer(kind=kint), pointer, intent(inout) :: ja(:)
306 real(kind=kreal), pointer, intent(inout) :: a(:)
307
308 integer(kind=kint) :: i, k, idx, is, ie, nnz_new, prev
309 integer(kind=kint), allocatable :: counter(:), counter_curr(:), ia0(:), ja0(:)
310 real(kind=kreal), allocatable :: a0(:)
311
312 allocate(counter(0:n),counter_curr(n),ia0(n+1),ja0(nnz),a0(nnz))
313
314 counter(:) = 0
315 do i=1,nnz
316 idx = ia(i)
317 counter(idx) = counter(idx) + 1
318 end do
319 ia0(:)=0
320 do i=1,n
321 ia0(i+1) = ia0(i)+counter(i)
322 end do
323
324 counter_curr(:) = 0
325 do i=1,nnz
326 idx = ia(i)
327 counter_curr(idx) = counter_curr(idx) + 1
328 idx = ia0(idx)+counter_curr(idx)
329 ja0(idx) = ja(i)
330 a0(idx) = a(i)
331 end do
332
333 !$omp parallel private(i, iS, iE)
334 !$omp do
335 do i=1,n
336 is = ia0(i)+1
337 ie = ia0(i+1)
338 call sort_column_ascending_order0(ie-is+1,ja0(is:ie),a0(is:ie))
339 enddo
340 !$omp end do
341 !$omp end parallel
342
343 ia(1)=1
344 do i=1,n
345 ia(i+1) = ia(i)+counter(i)
346 end do
347
348 prev = 0
349 nnz_new = 0
350 ia(1) = 1
351 do i=1,n
352 do k=ia0(i)+1,ia0(i+1)
353 if( ja0(k) == prev ) then
354 a(nnz_new) = a(nnz_new) + a0(k)
355 else
356 nnz_new = nnz_new + 1
357 ja(nnz_new) = ja0(k)
358 a(nnz_new) = a0(k)
359 end if
360 prev = ja0(k)
361 end do
362 ia(i+1) = nnz_new + 1
363 prev = 0
364 end do
365
366 nnz = nnz_new
367
368 deallocate(counter,counter_curr,ia0,ja0,a0)
369
370 end subroutine
371
372 subroutine check_csr(n, nnz, ia, ja, a)
373 integer(kind=kint), intent(in) :: n
374 integer(kind=kint), intent(in) :: nnz
375 integer(kind=kint), pointer, intent(in) :: ia(:)
376 integer(kind=kint), pointer, intent(in) :: ja(:)
377 real(kind=kreal), pointer, intent(in) :: a(:)
378
379 integer :: i,k
380
381 if( ia(n+1)-1 /= nnz ) then
382 write(*,*) "Error in check_csr(1): ia(n+1)-1 /= nnz"
384 end if
385
386 do i=1,n
387 do k=ia(i),ia(i+1)-1
388 if( ja(k) <= 0 ) then
389 write(*,*) "Error in check_csr(2): ja(k) <= 0"
391 end if
392 if( ja(k) > n ) then
393 write(*,*) "Error in check_csr(3): ja(k) > n"
395 end if
396 if( k > ia(i) ) then
397 if( ja(k) <= ja(k-1) ) then
398 write(*,*) "Error in check_csr(4): ja(k) <= ja(k-1)"
400 end if
401 end if
402 end do
403 end do
404
405 end subroutine
406
407 subroutine sort_column_ascending_order(spMAT,myrank)
408 type (sparse_matrix), intent(inout) :: spmat
409 integer(kind=kint), intent(in) :: myrank
410
411 integer(kind=kint) :: i, is, ie
412
413 do i=2,spmat%N_loc+1
414 if(spmat%IRN(i)-spmat%IRN(i-1)>10000) &
415 & write(*,*) 'warning: n may be too large for set_mkl_set_prof0 for row ',i-1
416 enddo
417
418 !!$omp parallel private(i, iS, iE)
419 !!$omp do
420 do i=1,spmat%N_loc
421 is = spmat%IRN(i)
422 ie = spmat%IRN(i+1)-1
423 call sort_column_ascending_order0(ie-is+1,spmat%JCN(is:ie),spmat%A(is:ie))
424 enddo
425 !!$omp end do
426 !!$omp end parallel
427 end subroutine sort_column_ascending_order
428
429 subroutine sort_column_ascending_order0(n,ja,a)
430 integer(kind=kint), intent(in) :: n
431 integer(kind=kint), intent(inout) :: ja(:)
432 real(kind=kreal), intent(inout) :: a(:)
433
434 integer(kind=kint) :: i, work(2,n)
435 real(kind=kreal) :: oa(n)
436
437 do i=1,n
438 work(1,i) = ja(i)
439 work(2,i) = i
440 oa(i) = a(i)
441 enddo
442
443 call myqsort(n,work)
444
445 do i=1,n
446 ja(i) = work(1,i)
447 a(i) = oa(work(2,i))
448 enddo
449
450 end subroutine sort_column_ascending_order0
451
452 recursive subroutine myqsort(n,work)
453 integer(kind=kint), intent(in) :: n
454 integer(kind=kint), intent(inout) :: work(:,:)
455
456 integer(kind=kint) :: i, key, work1(2,n), n_low, n_high, next, minidx, maxidx
457 logical :: sorted
458
459 if(n<2) return
460 !return if work is already sorted
461 sorted = .true.
462 minidx = work(1,1)
463 maxidx = work(1,1)
464 do i=1,n-1
465 next = work(1,i+1)
466 if( work(1,i) > next ) sorted = .false.
467 if( next < minidx ) minidx = next
468 if( next > maxidx ) maxidx = next
469 enddo
470 if(sorted) return
471
472 if( n<5 .or. maxidx-minidx < 2) then
473 call myinssort(n,work)
474 return
475 endif
476
477 key = (minidx+maxidx)/2
478
479 n_low=0
480 do i=1,n
481 if(work(1,i)<key) then
482 n_low=n_low+1
483 work1(1:2,n_low) = work(1:2,i)
484 endif
485 enddo
486
487 n_high=0
488 do i=1,n
489 if(work(1,i)>=key) then
490 n_high=n_high+1
491 work1(1:2,n_low+n_high) = work(1:2,i)
492 endif
493 enddo
494
495 if(n_low>0) call myqsort(n_low,work1(1:2,1:n_low))
496 if(n_high>0) call myqsort(n_high,work1(1:2,n_low+1:n_low+n_high))
497
498 do i=1,n
499 work(1:2,i) = work1(1:2,i)
500 enddo
501
502 end subroutine myqsort
503
504 subroutine myinssort(n,work)
505 integer(kind=kint), intent(in) :: n
506 integer(kind=kint), intent(inout) :: work(:,:)
507
508 integer(kind=kint) :: i,j,tmp(2)
509
510 do i=2,n
511 do j=i,2,-1
512 if(work(1,j)<work(1,j-1)) then
513 tmp(1:2) = work(1:2,j)
514 work(1:2,j) = work(1:2,j-1)
515 work(1:2,j-1) = tmp(1:2)
516 endif
517 enddo
518 enddo
519
520 end subroutine myinssort
521
522#endif
523
524 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=kint) function hecmw_comm_get_size()
integer(kind=kint) function hecmw_comm_get_comm()
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine hecmw_abort(comm)
real(kind=kreal) function hecmw_wtime()
This module provides linear equation solver interface for Cluster Pardiso.
subroutine, public hecmw_clustermkl_wrapper(spmat, phase_start, solution, istat)
subroutine hecmw_gatherv_real(sbuf, sc, rbuf, rcs, disp, root, comm)
subroutine hecmw_allgather_int_1(sval, rbuf, comm)
subroutine hecmw_gatherv_int(sbuf, sc, rbuf, rcs, disp, root, comm)
This module provides DOF based sparse matrix data structure (CSR and COO)
integer(kind=kint), parameter, public sparse_matrix_type_coo
integer(kind=kint), parameter, public sparse_matrix_type_csr
integer(kind=kint), parameter, public sparse_matrix_symtype_sym
subroutine, public sparse_matrix_gather_rhs(spmat, rhs_all)
integer(kind=kint), parameter, public sparse_matrix_symtype_asym
integer(kind=kint), parameter, public sparse_matrix_symtype_spd
subroutine, public sparse_matrix_scatter_rhs(spmat, rhs_all)