psfun_d_sai.f90

This file depends on

Submodules

psfun_d_sai_mod
Source Code

! BSD 3-Clause License
!
! Copyright (c) 2020, Fabio Durastante
! All rights reserved.
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions are met:
!
! 1. Redistributions of source code must retain the above copyright notice, this
!    list of conditions and the following disclaimer.
!
! 2. Redistributions in binary form must reproduce the above copyright notice,
!    this list of conditions and the following disclaimer in the documentation
!    and/or other materials provided with the distribution.
!
! 3. Neither the name of the copyright holder nor the names of its
!    contributors may be used to endorse or promote products derived from
!    this software without specific prior written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

submodule (psfun_d_krylov_mod) psfun_d_sai_mod
  !! This modules implements the variant of the Shitf-And-Invert method for the
  !! computation of \(f(A)b\).

contains

  module subroutine psfun_d_saiarnoldi(fun,a,kryl,prec,tau,desc_a, &
    & initmax,initrace,inistop,y,x,eps,info,itmax,itrace,istop,iter,err,res)
    !! Shift-and-invert method based on the Arnoldi orthogonalization procedure
    !! the method builds a basis \( V_k \) for the shifted Krylov subspace
    !!
    !! \begin{equation*}
    !! \mathcal{K}_( (A + \tau I)^{-1},x ) = \{ x,(A + \tau I)^{-1}x,\ldots,(A + \tau I)^{k-1}x\},
    !! \end{equation*}
    !!
    !! and approximate \( y = f(\alpha A)x \approx \beta_1 f((H_k^{-1} - \tau I)^{-1}) e_1\),for
    !! \(\beta_1 = \|x\|_2\), \(e_1\) the first vector of the canonical
    !! base of \(\mathbb{R}^k\), and \(H_k\) the Hessemberg matrix given
    !! by the projection of \(A\) into the subspace \(\mathcal{K}_( (A + \tau I)^{-1},x )\).
    !!
    !! To  march the algorithm one needs to solve at each step a shifted linear
    !! system for which we employ the routines from [[psfun_krylov_mod]] and the
    !! preconditioners from AMG4PSBLAS.

    use psb_base_mod
    use psfun_d_serial_mod
    use psfun_krylov_mod
    use amg_prec_mod
    use psb_d_krylov_conv_mod, only: log_header, log_conv, log_end
    implicit none

    type(psfun_d_serial), intent(inout)  :: fun  !! Function object
    type(psb_dspmat_type), intent(in)    :: a    !! Distribute sparse matrix
    character(len=*), intent(in)         :: kryl !! Krylov method for the solution of inner systems
    type(amg_dprec_type), intent(inout)  :: prec !! Preconditioner for the inner method
    real(psb_dpk_), intent(in)           :: tau !! Shift parameter of the method
    type(psb_desc_type), intent(in)      :: desc_a !! Descriptor for the sparse matrix
    type(psb_d_vect_type), intent(inout) :: y !! Output vector
    type(psb_d_vect_type), intent(inout) :: x !! Input vector
    integer(psb_ipk_), intent(in)  :: initmax !! Maximum number of iteration (inner method)
    integer(psb_ipk_), intent(in)  :: initrace !! Trace for logoutput (inner method)
    integer(psb_ipk_), intent(in)  :: inistop !! Stop criterion (inner method)
    real(psb_dpk_), intent(in)           :: eps !! Requested tolerance
    integer(psb_ipk_), intent(out)       :: info  !! Output flag
    integer(psb_ipk_), optional, intent(in)  :: itmax !! Maximum number of iteration
    integer(psb_ipk_), optional, intent(in)  :: itrace !! Trace for logoutput
    integer(psb_ipk_), optional, intent(in)  :: istop !! Stop criterion
    integer(psb_ipk_), optional, intent(out) :: iter !! Number of iteration
    real(psb_dpk_), optional, intent(out) :: err !! Last estimate error
    real(psb_dpk_), optional, allocatable, intent(out) :: res(:) !! Vector of the residuals

    !Local Variables
    integer(psb_ipk_) :: litmax, itrace_, istop_
    integer(psb_lpk_) :: mglob
    integer(psb_ipk_) :: n_row, n_col, nl, naux, itx, i, k, j, lwork
    real(psb_dpk_)    :: scaling
    !Variables of possible large size:
    real(psb_dpk_), allocatable   :: aux(:)
    real(psb_dpk_), allocatable   :: h(:,:), rs(:), e(:), yk(:), hcomp(:,:)
    integer(psb_ipk_), allocatable :: work(:), ipiv(:)
    type(psb_d_vect_type), allocatable :: v(:)
    type(psb_d_vect_type)              :: w, xt
    ! Error log
    real(psb_dpk_) :: deps, errden, errnum, derr
    integer(psb_ipk_) :: err_act
    type(psb_ctxt_type) :: ctxt
    integer(psb_ipk_) :: iam, np

    character(len=20)           :: name
    character(len=60)           :: methdname

    ! debug
    ! real(psb_dpk_), allocatable    :: va(:)

    ! Information on the distributed environment
    info = psb_success_
    name = 'SAIARNOLDI'
    methdname = trim(name) // trim("+") // trim(fun%fname) // trim("+") // trim(fun%variant)
    call psb_erractionsave(err_act)
    ctxt = desc_a%get_context()
    call psb_info(ctxt, iam, np)

    mglob = desc_a%get_global_rows()
    n_row = desc_a%get_local_rows()
    n_col = desc_a%get_local_cols()

    ! Select the stop-criterion
    if (present(istop)) then
      istop_ = istop
    else
      istop_ = 1
    endif

    if ((istop_ < 1 ).or.(istop_ > 2 ) ) then
      info=psb_err_invalid_istop_
      err=info
      call psb_errpush(info,name,i_err=(/istop_/))
      goto 9999
    endif

    ! Maximum number of iterations
    if (present(itmax)) then
      litmax = itmax
    else
      litmax = 200
    endif
    nl = litmax

    naux = 4*n_col

    ! What do i print?
    if (present(itrace)) then
      itrace_ = itrace
    else
      itrace_ = 0
    end if

    ! Chek sanity of the inputs both on the local and the global
    if (.not.allocated(x%v)) then
      info = psb_err_invalid_vect_state_
      call psb_errpush(info,name)
      goto 9999
    endif
    if (.not.allocated(y%v)) then
      info = psb_err_invalid_vect_state_
      call psb_errpush(info,name)
      goto 9999
    endif
    call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info)
    if(info /= psb_success_) then
      info=psb_err_from_subroutine_
      call psb_errpush(info,name,a_err='psb_chkvect on X')
      goto 9999
    end if
    call psb_chkvect(mglob,lone,y%get_nrows(),lone,lone,desc_a,info)
    if(info /= psb_success_) then
      info=psb_err_from_subroutine_
      call psb_errpush(info,name,a_err='psb_chkvect on Y')
      goto 9999
    end if

    ! Allocate memory for the Krylov basis and the auxiliary quantities
    allocate(aux(naux),h(nl+1,nl+1),rs(nl+1),stat=info)

    if (info == psb_success_) call psb_geall(v,desc_a,info,n=nl+1)
    if (info == psb_success_) call psb_geall(w,desc_a,info)
    if (info == psb_success_) call psb_geall(xt,desc_a,info)
    if (info == psb_success_) call psb_geasb(v,desc_a,info,mold=x%v)
    if (info == psb_success_) call psb_geasb(w,desc_a,info,mold=x%v)
    if (info == psb_success_) call psb_geasb(xt,desc_a,info,mold=x%v)
    if (info /= psb_success_) then
      info=psb_err_from_subroutine_non_
      call psb_errpush(info,name)
      goto 9999
    end if

    ! We use the scaling in the computation of the Krylov subspace
    scaling = fun%scaling
    call fun%set("SCALING",done,info)

    ! LogVariables
    errnum = dzero
    errden = done
    deps   = eps
    ! Start the iteration
    if ((itrace_ > 0).and.(iam == 0)) call log_header(methdname)

    call psb_geaxpby(done,x,dzero,v(1),desc_a,info) ! v(1) = x
    if (info /= psb_success_) then
      info=psb_err_from_subroutine_non_
      call psb_errpush(info,name)
      goto 9999
    end if
    rs(1) = psb_genrm2(v(1),desc_a,info) ! rs(1) = ||v(1)||_2
    rs(2:) = dzero
    if (info /= psb_success_) then
      info=psb_err_from_subroutine_non_
      call psb_errpush(info,name)
      goto 9999
    end if

    call v(1)%scal(done/rs(1)) !v(1) = v(1)/||v(1)||_2
    itx = 0
    arnoldicycle: do i = 2, nl, 1
      itx = itx + 1
      ! write(*,'("Iteration ",i2)')i
      call psfun_dkrylov_vect(kryl,a,prec,v(i-1),done,tau,w,eps,desc_a,info,&
           & itmax=initmax,itrace=initrace,istop=inistop) ! w = (A+ \tau I)^{-1} v(i-1)
      ! va = w%get_vect()
      ! write(*,'("w(1) =",f8.2)')va(1)
      if (info /= psb_success_) then
        info=psb_err_from_subroutine_non_
        call psb_errpush(info,name)
        goto 9999
      end if
      do k = 1, i-1, 1
        h(k,i-1) = psb_gedot(v(k),w,desc_a,info)            !h_{k,i-1}=w^T*v(k)
        ! write(*,'("h(",i2,","i2") =",f8.2)')k,i-1,h(k,i-1)
        call psb_geaxpby(-h(k,i-1),v(k),done,w,desc_a,info) !w = w - h_{k,i-1}v(k)
      end do
      h(i,i-1) = psb_genrm2(w,desc_a,info)                  !h_{i,i-1} = ||w||_2
      ! write(*,'("h(",i2,","i2") =",f8.2)')i,i-1,h(i,i-1)
      call psb_geaxpby(done/h(i,i-1),w,dzero,v(i),desc_a,info) !v(i) = w/h_{i,i-1}

      ! A posteriori error estimate:
      ! This is the default a posteriori-error estimate, it costs the computation
      ! of the matrix function at each iteration to use the computed values
      if( istop_ == 1) then
        !
        ! build y and then compute the residual as
        ! rs(j) = \| x \|_2 h_{i,i-1} | e_{i-1}^T f((H_{i-1}^-1 - tau I)^-1) e_1 |
        !
        allocate(e(i-1), stat=info)
        if (info /= 0) then
          info=psb_err_from_subroutine_non_
          call psb_errpush(info,name)
          goto 9999
        end if
        e(:) = dzero

        allocate(yk(i-1), stat=info)
        if (info /= 0) then
          info=psb_err_from_subroutine_non_
          call psb_errpush(info,name)
          goto 9999
        end if

        ! We need to use Lapack to compute the inverse of the matrix on which
        ! we compute the matrix function
        allocate(hcomp(i-1,i-1), ipiv(i-1), work(1), stat=info)
        if (info /= 0) then
          info=psb_err_from_subroutine_non_
          call psb_errpush(info,name)
          goto 9999
        end if
        hcomp = h(1:i-1,1:i-1)
        ! We compute the inverse of hcomp, this is done in three steps:
        ! 1) Compute the hcomp = P*L*U factorization
        call dgetrf(i-1,i-1,hcomp(1:i-1,1:i-1),i-1,ipiv,info)
        if (info /= 0) then
          call psb_errpush(info,name//"dgetrf")
          goto 9999
        end if
        ! 2) Compute the workspace needed by the inversion routine
        call dgetri(i-1,hcomp(1:i-1,1:i-1),i-1,ipiv,work,-1,info)
        if (info /= 0) then
          call psb_errpush(info,name//"dgetri1")
          goto 9999
        end if
        lwork = work(1)
        deallocate(work)
        allocate(work(lwork), stat=info)
        if (info /= 0) then
          info=psb_err_from_subroutine_non_
          call psb_errpush(info,name)
          goto 9999
        end if
        ! 3) Actually compute the inverse
        call dgetri(i-1,hcomp(1:i-1,1:i-1),i-1,ipiv,work,lwork,info)
        if (info /= 0) then
          call psb_errpush(info,name//"dgetri2")
          goto 9999
        end if

        ! Now we apply the "back" Shift
        do j=1,i-1,i
          hcomp(j,j) = hcomp(j,j) - tau
        end do

        ! And we can now apply the matrix function:
        e(1) = done
        call fun%apply(hcomp(1:i-1,1:i-1),yk,e,info)
        if (info /= psb_success_) then
          info=psb_err_from_subroutine_non_
          call psb_errpush(info,name)
          goto 9999
        end if

        errnum = rs(1)*h(i,i-1)*abs( yk(i-1) );
        errden = norm2(yk)
        rs(i) = errnum/errden

        ! And deallocate auxiliary variables
        deallocate(hcomp,ipiv,work, stat=info)
        if (info /= 0) then
          info=psb_err_from_subroutine_non_
          call psb_errpush(info,name)
          goto 9999
        end if

        ! log of the error estimate
        if (itrace_ > 0) &
             & call log_conv(methdname,iam,itx,itrace_,errnum,errden,deps)

        if (allocated(e)) deallocate(e, stat=info)
        if (info /= 0) then
          info=psb_err_from_subroutine_non_
          call psb_errpush(info,name)
          goto 9999
        end if
        ! Check convergence
        if(rs(i) < deps) then
          exit arnoldicycle
        else
          if (allocated(yk)) deallocate(yk, stat=info)
          if (info /= 0) then
            info=psb_err_from_subroutine_non_
            call psb_errpush(info,name)
            goto 9999
          end if
        end if

      end if

    end do arnoldicycle
    !
    ! Assemble the final solution
    !
    if(.not.allocated(yk)) then
      ! First we look if we have arrived here having not used an a posteriori
      ! error estimate, and thus not having computed the solution in the Krylov
      ! subspace.
      allocate(e(itx-1), stat=info)
      if (info /= 0) then
        info=psb_err_from_subroutine_non_
        call psb_errpush(info,name)
        goto 9999
      end if
      e(:) = dzero

      allocate(yk(itx-1), stat=info)
      if (info /= 0) then
        info=psb_err_from_subroutine_non_
        call psb_errpush(info,name)
        goto 9999
      end if
      e(1) = done
      call fun%apply(h(1:itx-1,1:itx-1),yk,e,info)
      if (info /= psb_success_) then
        info=psb_err_from_subroutine_non_
        call psb_errpush(info,name)
        goto 9999
      end if

      if (allocated(e)) deallocate(e, stat=info)
      if (info /= 0) then
        info=psb_err_from_subroutine_non_
        call psb_errpush(info,name)
        goto 9999
      end if
    end if

    ! Final Log
    call log_end(methdname,iam,itx,itrace_,errnum,errden,deps,err=derr,iter=iter)
    if (present(err)) err = derr
    if (present(res)) then
      allocate(res(itx+1),stat=info)
      res(1:itx) = rs(1:itx)
      res(itx+1) = derr
    end if

    call  psb_geaxpby(yk(1),v(1),dzero,y,desc_a,info)
    do i=2,itx-1,1
      call  psb_geaxpby(yk(i),v(i),done,y,desc_a,info)
    end do
    call y%scal(rs(1))

    ! Set back the scaling
    call fun%set("SCALING",scaling,info)

    ! Free the memory
    if (info == psb_success_) call psb_gefree(v,desc_a,info)
    if (info == psb_success_) call psb_gefree(w,desc_a,info)
    if (info == psb_success_) call psb_gefree(xt,desc_a,info)
    if (info == psb_success_) deallocate(aux,h,rs,stat=info)
    if (info /= psb_success_) then
      info=psb_err_from_subroutine_non_
      call psb_errpush(info,name)
      goto 9999
    end if

    ! Close and return
    call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)
return

  end

end submodule psfun_d_sai_mod
psfun_d_sai.f90 Source File

Contents

Submodules

Source Code

This file depends on

Contents

Submodules

Source Code

Source Code