千万自由度大规模线性方程组并行求解库PETSc的介绍
PETSc是Portable, ExtensibleToolkit for Scientific Computation的缩写,是美国能源部开发的科学计算可移植扩展工具包,其基于MPI实现在分布式内存环境下实现偏微分方程和线性方程组的求解。
对于线性方程组的求解,PETSc提供了较为全面的迭代方法求解,尤其是基于Krylov方法的各种迭代方法及各种预处理方法。在PETSc中,支持的krylov方法可通过KspSetType进行设置,具体支持的KspType如下:
typedef const char *KSPType; #define KSPRICHARDSON "richardson" #define KSPCHEBYSHEV "chebyshev" #define KSPCG "cg" #define KSPGROPPCG "groppcg" #define KSPPIPECG "pipecg" #define KSPPIPECGRR "pipecgrr" #define KSPPIPELCG "pipelcg" #define KSPPIPEPRCG "pipeprcg" #define KSPPIPECG2 "pipecg2" #define KSPCGNE "cgne" #define KSPNASH "nash" #define KSPSTCG "stcg" #define KSPGLTR "gltr" #define KSPCGNASH PETSC_DEPRECATED_MACRO("GCC warning \"KSPCGNASH macro is deprecated use KSPNASH (since version 3.11)\"") "nash" #define KSPCGSTCG PETSC_DEPRECATED_MACRO("GCC warning \"KSPCGSTCG macro is deprecated use KSPSTCG (since version 3.11)\"") "stcg" #define KSPCGGLTR PETSC_DEPRECATED_MACRO("GCC warning \"KSPCGGLTR macro is deprecated use KSPSGLTR (since version 3.11)\"") "gltr" #define KSPFCG "fcg" #define KSPPIPEFCG "pipefcg" #define KSPGMRES "gmres" #define KSPPIPEFGMRES "pipefgmres" #define KSPFGMRES "fgmres" #define KSPLGMRES "lgmres" #define KSPDGMRES "dgmres" #define KSPPGMRES "pgmres" #define KSPTCQMR "tcqmr" #define KSPBCGS "bcgs" #define KSPIBCGS "ibcgs" #define KSPQMRCGS "qmrcgs" #define KSPFBCGS "fbcgs" #define KSPFBCGSR "fbcgsr" #define KSPBCGSL "bcgsl" #define KSPPIPEBCGS "pipebcgs" #define KSPCGS "cgs" #define KSPTFQMR "tfqmr" #define KSPCR "cr" #define KSPPIPECR "pipecr" #define KSPLSQR "lsqr" #define KSPPREONLY "preonly" #define KSPNONE "none" #define KSPQCG "qcg" #define KSPBICG "bicg" #define KSPMINRES "minres" #define KSPSYMMLQ "symmlq" #define KSPLCD "lcd" #define KSPPYTHON "python" #define KSPGCR "gcr" #define KSPPIPEGCR "pipegcr" #define KSPTSIRM "tsirm" #define KSPCGLS "cgls" #define KSPFETIDP "fetidp" #define KSPHPDDM "hpddm"
在上面的方法中,有限元求解中常用的CG,Bicgstab,Gmres均有涉及。同时,当ksptype为preonly时,表明方程组不采用迭代求解,只采用预处理方法直接求解方程组,因此此时实际上就实现了线性方程组的直接法求解。
对于预处理方法,PETSc支持的方法如下:
typedef const char *PCType; #define PCNONE "none" #define PCJACOBI "jacobi" #define PCSOR "sor" #define PCLU "lu" #define PCQR "qr" #define PCSHELL "shell" #define PCAMGX "amgx" #define PCBJACOBI "bjacobi" #define PCMG "mg" #define PCEISENSTAT "eisenstat" #define PCILU "ilu" #define PCICC "icc" #define PCASM "asm" #define PCGASM "gasm" #define PCKSP "ksp" #define PCBJKOKKOS "bjkokkos" #define PCCOMPOSITE "composite" #define PCREDUNDANT "redundant" #define PCSPAI "spai" #define PCNN "nn" #define PCCHOLESKY "cholesky" #define PCPBJACOBI "pbjacobi" #define PCVPBJACOBI "vpbjacobi" #define PCMAT "mat" #define PCHYPRE "hypre" #define PCPARMS "parms" #define PCFIELDSPLIT "fieldsplit" #define PCTFS "tfs" #define PCML "ml" #define PCGALER_KIN "galerkin" #define PCEXOTIC "exotic" #define PCCP "cp" #define PCBFBT "bfbt" #define PCLSC "lsc" #define PCPYTHON "python" #define PCPFMG "pfmg" #define PCSMG "smg" #define PCSYSPFMG "syspfmg" #define PCREDISTRIBUTE "redistribute" #define PCSVD "svd" #define PCGAMG "gamg" #define PCCHOWILUVIENNACL "chowiluviennacl" #define PCROWSCALINGVIENNACL "rowscalingviennacl" #define PCSAVIENNACL "saviennacl" #define PCBDDC "bddc" #define PCKACZMARZ "kaczmarz" #define PCTELESCOPE "telescope" #define PCPATCH "patch" #define PCLMVM "lmvm" #define PCHMG "hmg" #define PCDEFLATION "deflation" #define PCHPDDM "hpddm" #define PCH2OPUS "h2opus" #define PCMPI "mpi"
在PETSC提供的上述方法的基础上,我们可以进行各种求解方法的求解测试,从而获得各种求解方法面对各种问题的具体性能。
另外一方面,在PETSC中,通过Mat和Vec定义的矩阵和向量可以依据具体insert的值自动组装成整体矩阵或者向量,这对于稀疏矩阵线性方程组的求解具有十分重要的意义。例如,在固体力学有限元求解中,如果手动组装稀疏矩阵存储的刚度矩阵,实际上需要依据单元节点连接获得节点的邻接矩阵从而确定每行的非零元素个数与位置。而如果采用PETSC,则可以直接定义整体刚度矩阵的Mat,再将各个单元的单元刚度矩阵逐个insert进整体刚度矩阵的Mat,PETSC就会自动形成最终的整体稀疏刚度矩阵。另外,在求解时对于矩阵和向量PETSC会自动进行MPI进程分割完成最终求解。
以下是一个从文件中读取矩阵和向量并求解AX=B的例子:
static char help[]="linear solve"; #include <petscmat.h> #include <petscksp.h> #undef __FUNCT__ #define __FUNCT__ "main" int main(int argc,char **args) { Mat A; Vec b,u,u_tmp,x; char Ain[PETSC_MAX_PATH_LEN],rhs[PETSC_MAX_PATH_LEN]; PetscErrorCode ierr; int m,n,nz,dummy; PetscInt i,col,row,rstart,rend,rstart1,rend1;; PetscScalar val; FILE *Afile,*bfile; PetscViewer view; PetscBool flg_A,flg_b,flg; PetscMPIInt size,rank; int shift; KSP ksp; PC pc; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = PetscOptionsGetString(PETSC_NULL,PETSC_NULL,"-Ain",Ain,PETSC_MAX_PATH_LEN,&flg_A);CHKERRQ(ierr); if (flg) shift = 0; if (flg_A){ // ierr = PetscPrintf(PETSC_COMM_WORLD,"\n Read matrix in ascii format ...\n");CHKERRQ(ierr); Afile=fopen(Ain,"r"); fscanf(Afile,"%d %d %d\n",&m,&n,&nz); //ierr = PetscPrintf(PETSC_COMM_WORLD,"m: %d, n: %d, nz: %d \n", m,n,nz);CHKERRQ(ierr); if (m != n) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ, "Number of rows, cols must be same for this example\n"); ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,n);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); PetscCall(MatSetUp(A)); ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); // printf("\nThis is process %d reading from %d to %d ...\n",rank,rstart,rend-1); for (i=0; i<nz; i++) { fscanf(Afile,"%d %d %le\n",&row,&col,(double*)&val); if (row>=rstart && row<rend) ierr = MatSetValues(A,1,&row,1,&col,&val,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); fflush(stdout); fclose(Afile); } ierr = PetscOptionsGetString(PETSC_NULL,PETSC_NULL,"-rhs",rhs,PETSC_MAX_PATH_LEN,&flg_b);CHKERRQ(ierr); ierr = PetscOptionsGetString(PETSC_NULL,PETSC_NULL,"-rhs",rhs,PETSC_MAX_PATH_LEN,&flg_b);CHKERRQ(ierr); if (flg_b){ ierr = VecCreate(PETSC_COMM_WORLD,&b);CHKERRQ(ierr); ierr = VecSetSizes(b,PETSC_DECIDE,n);CHKERRQ(ierr); ierr = VecSetFromOptions(b);CHKERRQ(ierr); ierr = VecGetOwnershipRange(b,&rstart1,&rend1);CHKERRQ(ierr); // printf("\nThis is process %d reading from %d to %d ...\n",rank,rstart1,rend1-1); bfile = fopen(rhs,"r"); for (i=0; i<n; i++) { fscanf(bfile,"%d %le\n",&dummy,(double*)&val); if (dummy>=rstart1 && dummy<rend1) ierr = VecSetValues(b,1,&i,&val,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(b);CHKERRQ(ierr); ierr = VecAssemblyEnd(b);CHKERRQ(ierr); fflush(stdout); fclose(bfile); } /*ierr = PetsfcPrint(PETSC_COMM_WORLD,"\n Write matrix in binary to 'matrix.dat' ...\n");CHKERRQ(ierr); ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,"matrix.dat",FILE_MODE_WRITE,&view);CHKERRQ(ierr); ierr = MatView(A,view);CHKERRQ(ierr); if (flg_b){ ierr = PetscPrintf(PETSC_COMM_WORLD,"\n Write rhs in binary to 'matrix.dat' ...\n");CHKERRQ(ierr); ierr = VecView(b,view);CHKERRQ(ierr); } ierr = MatDestroy(&A);CHKERRQ(ierr); if (flg_b) {ierr = VecDestroy(&b);CHKERRQ(ierr);} ierr = PetscViewerDestroy(&view);CHKERRQ(ierr);*/ PetscCall(VecDuplicate(b, &x)); PetscLogDouble start_time,end_time,time; PetscTime(&start_time); PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp)); PetscCall(KSPSetOperators(ksp, A, A)); PetscCall(KSPGetPC(ksp, &pc)); PetscCall(PCSetType(pc, PCJACOBI)); PetscCall(KSPSetType(ksp,KSPCG)); PetscCall(KSPSetTolerances(ksp, 1.e-6, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT)); PetscCall(KSPSetFromOptions(ksp)); PetscCall(KSPSolve(ksp, b, x)); PetscTime(&end_time); time = end_time - start_time; PetscPrintf(PETSC_COMM_WORLD, "\n%-15s%-7.5f seconds\n", "Average time:",time ); //PetscCall( VecView(x,PETSC_VIEWER_STDOUT_WORLD)); PetscCall(KSPDestroy(&ksp)); PetscCall(VecDestroy(&x)); PetscCall(MatDestroy(&A)); ierr = PetscFinalize(); return 0; }
具体的运行命令如下:
mpiexec -n 10 ./solve -Ain mat1.txt -rhs vec1.txt
其中mat1.txt的文件格式采用COO(第一行是行数,列数和非0元素个数):
Vec1.txt的文件格式如下(第1列是行号,第二列是数值):
最终结果:
以上,就是PETSC的简单介绍和一个简单例子,感谢您的阅读!
【完】
欢迎关注公众号 有限元术