!=======================================================================
!=======================================================================
      subroutine slap (nord, bv, xv, nelm, nblks, kstride, nulm
     &,                ntrans, ia, ja, ik, av, tol, itmax, iter
     &,                err, ierr, rwork, lenw, iwork, leniw, nc_aj
     &,                nr_jl, nr_aj, nt_indx, nt_indxr1, nt_kc
     &,                nt_indxc1, nt_i, nt_type, nr_jl2, nc_iu2
     &,                ik1, ik26, ik37, ik4, ik5, ia2, ia3)
!=======================================================================

!       sparse matrix solver for the EMBM atmosphere model

!       These routines expect the array to be stored in slap column
!       format. See the format converter slap_ss2y to convert from
!       slap triad to column format.

!       based on SLAP routines modified by N. Chepurniy and M. Eby

!       =================== s l a p column format ==================
!       this routine  requires that  the matrix a  be stored in  the
!       slap column format.  in this format the non-zeros are stored
!       counting down columns (except  for the diagonal entry, which
!       must appear first in each  "column") and  are stored  in the
!       real array a.  in other words, for each column in the matrix
!       put the diagonal entry in a.  then put in the other non-zero
!       elements going down   the  column (except  the diagonal)  in
!       order.  the ia array holds the row  index for each non-zero.
!       the ja array holds the offsets into the ia, a arrays for the
!       beginning of   each    column.    that  is,    ia(ja(icol)),
!       a(ja(icol)) points to the beginning of the icol-th column in
!       ia and  a.  ia(ja(icol+1)-1),  a(ja(icol+1)-1) points to the
!       end  of   the icol-th  column.  note   that  we  always have
!       ja(n+1) = nelt+1, where  n  is the number of columns in  the
!       matrix and  nelt   is the number of non-zeros in the matrix.

!       here is an example of the  slap column  storage format for a
!       5x5 matrix (in the a and ia arrays '|'  denotes the end of a
!       column):

!           5x5 matrix      slap column format for 5x5 matrix on left.
!                              1  2  3    4  5    6  7    8    9 10 11
!       |11 12  0  0 15|   a: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
!       |21 22  0  0  0|  ia:  1  2  5 |  2  1 |  3  5 |  4 |  5  1  3
!       | 0  0 33  0 35|  ja:  1  4  6    8  9   12
!       | 0  0  0 44  0|
!       |51  0 53  0 55|

      implicit none

      integer   locrb, locib, ligw
      parameter (locrb=1, locib=11)

      integer nord, nelm, nblks, kstride, nulm, ntrans, nsave
      integer itmax, iter, ierr, lenw, iwork(leniw)
      integer leniw, lociw, locl, locnc, locju, locnr
      integer locw, locrgw, locdin, locu, locjl, lociu, locil
      integer j,locigw, jbgn, jend, icol, nl, nu
      real bv(nord), xv(nord), av(nelm), tol, err, rwork(lenw)

      integer,dimension(nulm)   :: nc_aj, nr_jl, nr_aj
      integer,dimension(ntrans) :: nt_indx, nt_indxr1, nt_kc
      integer,dimension(ntrans) :: nt_indxc1, nt_i, nt_type
      integer,dimension(nulm)   :: nr_jl2, nc_iu2
      integer,dimension(nord)   :: ik1, ik26, ik37, ik4, ik5, ia2, ia3

      integer,dimension(nelm)   :: ia, ja, ik

      ierr = 0
      err  = 0.0
      nsave = 10

      nl = nulm
      nu = nulm

      locigw = locib
      locil = locigw + 20
      locjl = locil + nord+1
      lociu = locjl + nl
      locju = lociu + nu
      locnr = locju + nord+1
      locnc = locnr + nord
      lociw = locnc + nord

      locl = locrb
      locdin = locl + nl
      locu = locdin + nord
      locrgw = locu + nu
      locw = locrgw + 1+nord*(nsave+6)+nsave*(nsave+3)

      iwork(1) = locil
      iwork(2) = locjl
      iwork(3) = lociu
      iwork(4) = locju
      iwork(5) = locl
      iwork(6) = locdin
      iwork(7) = locu
      iwork(9) = lociw
      iwork(10) = locw

!     compute the incomplete lu decomposition.

      call slap_ssilus (nord, nelm, nblks, kstride, nulm, ntrans, ia, ja
     &,                 av, nl, iwork(locil), iwork(locjl), rwork(locl)
     &,                 rwork(locdin), nu, iwork(lociu), iwork(locju)
     &,                 rwork(locu), iwork(locnr), iwork(locnc), ik1
     &,                 nc_aj, nr_jl, nr_aj, nt_indx, nt_indxr1
     &,                 nt_kc,  nt_indxc1, nt_i, nt_type)

!     perform the incomplete lu preconditioned generalized minimum
!     residual iteration algorithm.  the following sgmres
!     defaults are used maxl = kmp = nsave, jscal = 0,
!     jpre = -1, nrmax = itmax/nsave

      iwork(locigw) = nsave
      iwork(locigw+1) = nsave
      iwork(locigw+2) = 0
      iwork(locigw+3) = -1
      iwork(locigw+4) = itmax/nsave
      ligw   = 20

      call slap_sgmres (nord, bv, xv, nelm, nulm, ia, ja, ik1, ik26
     &,                 ik37, ik4, ik5, ia2, ia3, av, tol, iter, err
     &,                 ierr, rwork, rwork, rwork(locrgw), lenw-locrgw
     &,                 iwork(locigw), ligw, rwork, iwork, kstride
     &,                 nr_jl2, nc_iu2)

      if (iter .gt.itmax) ierr = 2

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_ssilus (nord, nelm, nblks, kstride, nulm, ntrans
     &,                       iav, jav, av, nl, il, jl, l, dinv, nu, iu
     &,                       ju, u, nrow, ncol, ik1, nc_aj, nr_jl
     &,                       nr_aj, nt_indx, nt_indxr1, nt_kc
     &,                       nt_indxc1, nt_i, nt_type)
!=======================================================================

      implicit none

!***purpose  Incomplete lu decomposition preconditioner slap set up.
!            Routine to generate the incomplete ldu decomposition of a
!            matrix.  the  unit lower triangular factor l is stored by
!            rows and the  unit upper triangular factor u is stored by
!            columns.  The inverse of the diagonal matrix d is stored.
!            no fill in is allowed.
! *arguments:
! nord    :in       integer
!          order of the matrix.
! nelm    :in       integer.
!          number of elements in arrays iav and av.
! nblks   :in       integer.
! kstride :in       integer.
! nulm    :in       integer.
! ntrans  :in       integer.
! iav     :in       integer iav(nelm).
! jav     :in       integer jav(nelm), but changed to slap column with
!          (n+1) entries
! av      :in       real av(nelm).
!          these arrays should hold the matrix av in the slap column
!          format.  see "description", below.
! nl      :in       integer.
!          number of non-zeros in the el array. el ?
! il      :out      integer il(n+1).
! jl      :out      integer jl(nl).
! l       :out      real     l(nl).
!          il, jl, l  contain the unit lower  triangular factor of  the
!          incomplete decomposition  of some  matrix stored  in   slap
!          row format.     the   diagonal  of ones  *is*  stored.  see
!          "description", below for more details about the slap format.
! nu      :out      integer.
!          number of non-zeros in the u array.
! iu      :out      integer iu(n+1).
! ju      :out      integer ju(nu).
! u       :out      real     u(nu).
!          iu, ju, u contain   the unit upper triangular factor of the
!          incomplete  decomposition    of some matrix  stored in slap
!          column  format.   the diagonal of ones   *is*  stored.  see
!          "description", below  for  more  details  about  the   slap
!          format.
! nrow    :work     integer nrow(n).
!          nrow(i) is the number of non-zero elements in the i-th row
!          of l.
! ncol    :work     integer ncol(n).
!          ncol(i) is the number of non-zero elements in the i-th
!         column of u.

! *description
!       il, jl, l should contain the unit  lower triangular factor of
!       the incomplete decomposition of the av matrix  stored in slap
!       row format.  iu, ju, u should contain  the unit upper factor
!       of the  incomplete decomposition of  the av matrix  stored in
!       slap column format this ilu factorization can be computed by
!       the ssilus routine.  the diagonals (which is all one's) are
!       stored.

      integer nord, nelm, nblks, kstride, nulm, ntrans, iav(nelm)
      integer jav(nelm), nl, il(nl), jl(nl), nu, iu(nu), ju(nu)
      integer nrow(nord), ncol(nord), i, icol, indx, indxr1, indxc1, kc
      real av(nelm), l(nl), dinv(nord), u(nu)

      integer,dimension(nord)   :: ik1
      integer                   :: ii, itrans, it_type
      integer,dimension(nulm)   :: nc_aj, nr_jl, nr_aj
      integer,dimension(ntrans) :: nt_indx, nt_indxr1, nt_kc
      integer,dimension(ntrans) :: nt_indxc1, nt_i, nt_type

      logical,save :: first_time = .true.

!     SEGMENT 1
      if (first_time) then
        call slap_get_iujl (iu, jl, nblks, kstride, nulm)
        call slap_gen_nrnc (nrow, ncol, il, ju, iav, jav, nord, nelm
     &,                     nblks, kstride, nulm, nc_aj, nr_jl, nr_aj)
      endif

!     SEGMENT 2
!     Copy the matrix av into the l and u structures.
      do icol=1,nord
        dinv(icol) = av(ik1(icol))     ! Main diagonal element in column icol
      enddo

      do ii = 1, nulm
        iu(ii) = iav(nc_aj(ii))
        u(ii) =  av(nc_aj(ii))
        l(nr_jl(ii)) =  av(nr_aj(ii))
      enddo

!     SEGMENT 3
!     Sort the rows of l and the columns of u.
!     and print number of Exchanges done during sort

!     SEGMENT 4
!     perform the incomplete ldu decomposition.

      if (first_time) then
        call slap_write23 (nord, ntrans, nl, il, jl, nu, iu, ju, nt_indx
     &,                    nt_indxr1, nt_kc, nt_indxc1, nt_i, nt_type)
        first_time = .false.
      endif

      do itrans=1,ntrans
        it_type = nt_type(itrans)
        indx    = nt_indx(itrans)
        i       = nt_i(itrans)

        if (it_type .eq. 2) then
          l(indx) = l(indx)/dinv(jl(indx))
        elseif (it_type .eq. 4) then
          u(indx) = u(indx)/dinv(iu(indx))
        elseif (it_type .eq. 5) then
          indxr1 = nt_indxr1(itrans)
          kc = nt_kc(itrans)
          indxc1 = nt_indxc1(itrans)
          dinv(i) = dinv(i) - l(indxr1)*dinv(kc)*u(indxc1)
        endif
      enddo

!     replace diagonal lts by their inverses.
      do i=1,nord
        dinv(i) = 1./dinv(i)
      enddo

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_sgmres (n, b, x, nelt, nulm, ia, ja, ik1, ik26
     &,                       ik37, ik4, ik5, ia2, ia3, a, tol, iter
     &,                       err, ierr, sb, sxx, rgwk, lrgw, igwk, ligw
     &,                       rwork, iwork, kstride, nr_jl2, nc_iu2)
!=======================================================================

      implicit none

      integer  n, nelt, nulm, ia(nelt), ja(nelt), iter
      integer  lrgw, ligw, kstride, igwk(ligw), iwork(*)
      real     b(n), x(n), tol, err, sb(n), sxx(n), a(nelt)
      real     rgwk(lrgw), rwork(*)
      integer  jpre, kmp, maxl, nms, maxlp1, nmsl, nrsts, nrmax
      integer  i, iflag, lr, ldl, lhes, lgmr, lq, lv, lw
      integer  lz, ierr, lzm1, lxl, jscal
      real     bnrm, rhol, sum, slap_snrm2
      integer ik1(n), ik26(n), ik37(n), ik4(n), ik5(n), ia2(n), ia3(n)
      integer,dimension (nulm) :: nr_jl2, nc_iu2

      ierr = 0
      rhol = 0.0

!   ---------------------------------------------------------------
!     load method parameters with user values or defaults.
!   --------------------------------------------------------------
      maxl = igwk(1)
      if (maxl .eq. 0) maxl = 10
      if (maxl .gt. n) maxl = n
      kmp = igwk(2)
      if (kmp .eq. 0) kmp = maxl
      if (kmp .gt. maxl) kmp = maxl
      jscal = igwk(3)
      jpre = igwk(4)
      nrmax = igwk(5)

      if (nrmax .eq. 0) nrmax = 10
!     if nrmax .eq. -1, then set nrmax = 0 to turn off restarting.
      if (nrmax .eq. -1) nrmax = 0

!     initialize counters.
      iter = 0
      nms = 0
      nrsts = 0

!   ---------------------------------------------------------------
!     form work array segment pointers.
!   --------------------------------------------------------------
      maxlp1 = maxl + 1
      lv = 1
      lr = lv + n*maxlp1
      lhes = lr + n + 1
      lq = lhes + maxl*maxlp1
      ldl = lq + 2*maxl
      lw = ldl + n
      lxl = lw + n
      lz = lxl + n

!     load igwk(6) with required minimum length of the rgwk array.
      igwk(6) = lz + n - 1

!   --------------------------------------------------------------
!     calculate scaled-preconditioned norm of rhs vector b.
!   --------------------------------------------------------------
      if (jpre .lt. 0) then
        call slap_sslui (n, nulm, b, rgwk(lr), rwork, iwork, nr_jl2
     &,                  nc_iu2)
        nms = nms + 1
      else
        call slap_scopy (n, b, 1, rgwk(lr), 1)
      endif

      if (jscal .eq. 2 .or. jscal .eq. 3) then
        sum = 0.e0
        do i=1,n
          sum = sum + (rgwk(lr-1+i)*sb(i))**2
        enddo
        bnrm = sqrt(sum)
      else
        bnrm = slap_snrm2(n,rgwk(lr),1)
      endif

!   -------------------------------------------------------------
!     calculate initial residual.
!   -------------------------------------------------------------
      call slap_ssmv (n, x, rgwk(lr), nelt, ia, ja, ik1, ik26, ik37
     &,               ik4, ik5, ia2, ia3, a, kstride)
      do i=1,n
        rgwk(lr-1+i) = b(i) - rgwk(lr-1+i)
      enddo

!   -------------------------------------------------------------
!     if performing restarting, then load the residual into the
!     correct location in the rgwk array.
!   --------------------------------------------------------------
      do while (nrsts .le. nrmax)

        if (nrsts .gt. 0) then
!         copy the curr residual to different loc in the rgwk array.
          call slap_scopy (n, rgwk(ldl), 1, rgwk(lr), 1)
        endif

!   ------------------------------------------------------------
!       use the spigmr algorithm to solve the linear system a*z = r.
!   -------------------------------------------------------------
        call slap_spigmr (n, nulm, rgwk(lr), sb, sxx, jscal, maxl
     &,                   maxlp1, kmp, nrsts, jpre, nmsl, rgwk(lz)
     &,                   rgwk(lv), rgwk(lhes), rgwk(lq), lgmr, rwork
     &,                   iwork, rgwk(lw), rgwk(ldl), rhol, nrmax
     &,                   bnrm, x, tol, nelt, ia, ja, ik1, ik26, ik37
     &,                   ik4, ik5, ia2, ia3, a, iflag, err, kstride
     &,                   nr_jl2, nc_iu2)

        iter = iter + lgmr
        nms = nms + nmsl

!       increment x by the current approximate solution z of a*z = r.
        lzm1 = lz - 1
        do i=1,n
          x(i) = x(i) + rgwk(lzm1+i)
        enddo

        if (iflag .eq. 0) then
!         the iteration has converged.
          igwk(7) = nms
          rgwk(1) = rhol
          ierr = 0
          return

        elseif (iflag .eq. 1) then
          nrsts = nrsts + 1

        elseif (iflag .eq. 2) then
!         gmres failed to reduce last residual in maxl iterations.
!         the iteration has stalled.
          igwk(7) = nms
          rgwk(1) = rhol
          ierr = 2
          return
        endif

      enddo

!     max number((nrmax+1)*maxl) of linear iterations performed.
      igwk(7) = nms
      rgwk(1) = rhol
      ierr = 1

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_spigmr (n, nulm, r0, sr, sz, jscal, maxl, maxlp1
     &,                       kmp, nrsts, jpre, nmsl, z, v, hes, q, lgmr
     &,                       rpar, ipar, wk, dl, rhol, nrmax, bnrm, x
     &,                       tol, nelt, ia, ja, ik1, ik26, ik37, ik4
     &,                       ik5, ia2, ia3, a, iflag, err, kstride
     &,                       nr_jl2, nc_iu2)
!=======================================================================

      implicit none

      integer  n, nulm, maxl, maxlp1, kmp, jpre, nmsl, lgmr, iflag
      integer  jscal, nrsts, nrmax, nelt, kstride, iq, kp1, kb
      real     rhol, bnrm, tol, t, t1, t2
      real     r0(*), sr(*), sz(*), z(*), v(n,*), hes(maxlp1,*)
      real     q(*), rpar(*), wk(*), dl(*), a(*), x(*)
      integer  ipar(*), ia(*), ja(*)
      real     slap_snrm2
      integer ik1(N), ik26(N), ik37(N), ik4(N), ik5(N), ia2(N), ia3(N)
      integer,dimension(nulm) :: nr_jl2, nc_iu2

!     local variables.

      integer i, info, ip1, i2, j, k, ll, llp1
      real r0nrm, c, dlnrm, prod, rho, s, snormw, tem, err
      integer iter, itmax

!     zero out the z array.
      do i=1,n
        z(i) = 0.0e0
      enddo

      iflag = 0
      lgmr = 0
      nmsl = 0
!     load itmax, the maximum number of iterations.
      itmax =(nrmax+1)*maxl
!   -------------------------------------------------------------------
!     the initial residual is the vector r0.
!     apply left precon. if jpre < 0 and this is not a restart.
!     apply scaling to r0 if jscal = 2 or 3.
!   -------------------------------------------------------------------

      if ((jpre .lt. 0) .and.(nrsts .eq. 0)) then
        call slap_scopy (n, r0, 1, wk, 1)
        call slap_sslui (n, nulm, wk, r0, rpar, ipar, nr_jl2, nc_iu2)
        nmsl = nmsl + 1
      endif
      if (((jscal.eq.2) .or.(jscal.eq.3)) .and.(nrsts.eq.0)) then
        do i=1,n
          v(i,1) = r0(i)*sr(i)
        enddo
      else
        do i=1,n
          v(i,1) = r0(i)
        enddo
      endif

      r0nrm = slap_snrm2 (n, v, 1)

      iter = nrsts*maxl

      if (r0nrm/(bnrm + 1.e-20) .le. tol) return

      tem = 1.0e0/(r0nrm + 1.e-20)
      call slap_sscal (n, tem, v(1,1), 1)

!     zero out the hes array.

      do j=1,maxl
        do i=1,maxlp1
          hes(i,j) = 0.0e0
        enddo
      enddo
!   -------------------------------------------------------------------
!     main loop to compute the vectors v(*,2) to v(*,maxl).
!     the running product prod is needed for the convergence test.
!   -------------------------------------------------------------------
      prod = 1.0e0

      do ll=1,maxl
        lgmr = ll
!   -------------------------------------------------------------------
!       unscale  the  current v(ll)  and store  in wk.  call routine
!       sslui    to   compute(m-inverse)*wk,   where    m   is  the
!       preconditioner matrix.  save the answer in z.   call routine
!       ssmv to compute  vnew  = a*z,  where  a is  the the system
!       matrix.  save the answer in  v(ll+1).  scale v(ll+1).   call
!       routine sorth  to  orthogonalize the    new vector vnew   =
!       v(*,ll+1).  call routine sheqr to update the factors of hes.
!   -------------------------------------------------------------------
        if ((jscal .eq. 1) .or.(jscal .eq. 3)) then
          do i=1,n
            wk(i) = v(i,ll)/sz(i)
          enddo
        else
          call slap_scopy (n, v(1,ll), 1, wk, 1)
        endif

        if (jpre .gt. 0) then
          call slap_sslui (n, nulm, wk, z, rpar, ipar, nr_jl2, nc_iu2)
          nmsl = nmsl + 1
          call slap_ssmv (n, z, v(1,ll+1), nelt, ia, ja, ik1, ik26
     &,                    ik37, ik4, ik5, ia2, ia3, a, kstride)
        else
          call slap_ssmv (n, wk, v(1,ll+1), nelt, ia, ja, ik1, ik26
     &,                    ik37, ik4, ik5, ia2, ia3, a, kstride)
        endif

        if (jpre .lt. 0) then
          call slap_scopy (n, v(1,ll+1), 1, wk, 1)
          call slap_sslui (n, nulm, wk, v(1,ll+1), rpar, ipar, nr_jl2
     &,                    nc_iu2)
          nmsl = nmsl + 1
        endif
        if ((jscal .eq. 2) .or.(jscal .eq. 3)) then
          do i=1,n
            v(i,ll+1) = v(i,ll+1)*sr(i)
          enddo
        endif
        call slap_sorth (v(1,ll+1), v, hes, n, ll, maxlp1, kmp, snormw)
        hes(ll+1,ll) = snormw
        call slap_sheqr (hes, maxlp1, ll, q, info, ll)
        if (info .eq. ll) go to 120
!   -------------------------------------------------------------------
!       update rho, the estimate of the norm of the residual r0-a*zl.
!       if kmp <  maxl, then the vectors v(*,1),...,v(*,ll+1) are not
!       necessarily orthogonal for ll > kmp.  the vector dl must then
!       be computed, and its norm used in the calculation of rho.
!   -------------------------------------------------------------------
        prod = prod*q(2*ll)
        rho = abs(prod*r0nrm)
        if ((ll.gt.kmp) .and.(kmp.lt.maxl)) then
          if (ll .eq. kmp+1) then
            call slap_scopy (n, v(1,1), 1, dl, 1)
            do i=1,kmp
              ip1 = i + 1
              i2 = i*2
              s = q(i2)
              c = q(i2-1)
              do k=1,n
                dl(k) = s*dl(k) + c*v(k,ip1)
              enddo
            enddo
          endif
          s = q(2*ll)
          c = q(2*ll-1)/snormw
          llp1 = ll + 1
          do k=1,n
            dl(k) = s*dl(k) + c*v(k,llp1)
          enddo
          dlnrm = slap_snrm2 (n, dl, 1)
          rho = rho*dlnrm
        endif
        rhol = rho
!   -------------------------------------------------------------------
!       test for convergence.  if passed, compute approximation zl.
!       if failed and ll < maxl, then continue iterating.
!   -------------------------------------------------------------------
        iter = nrsts*maxl + lgmr

        if (rhol/(bnrm + 1.e-20) .le. tol) go to 200

        if (ll .eq. maxl) go to 100
!   -------------------------------------------------------------------
!       rescale so that the norm of v(1,ll+1) is one.
!   -------------------------------------------------------------------
        tem = 1.0e0/snormw
        call slap_sscal (n, tem, v(1,ll+1), 1)
      enddo
 100  continue
      if (rho .lt. r0nrm) go to 150
 120  continue
      iflag = 2

!     load approximate solution with zero.

      do i=1,n
        z(i) = 0.e0
      enddo
      return
 150  iflag = 1

!     tolerance not met, but residual norm reduced.
      if (nrmax .gt. 0) then

!       if performing restarting (nrmax > 0)  calculate the residual
!       vector rl and  store it in the dl  array.  if the incomplete
!       version is being used (kmp < maxl) then dl has  already been
!       calculated up to a scaling factor.   use srlcal to calculate
!       the scaled residual vector.

        call slap_srlcal (n, kmp, maxl, maxl, v, q, dl, snormw, prod,
     $       r0nrm)
      endif
!   -------------------------------------------------------------------
!     compute the approximation zl to the solution.  since the
!     vector z was used as work space, and the initial guess
!     of the linear iteration is zero, z must be reset to zero.
!   -------------------------------------------------------------------
 200  continue
      ll = lgmr
      llp1 = ll + 1
      do k=1,llp1
        r0(k) = 0.0e0
      enddo
      r0(1) = r0nrm

!     minimize(r0-hes*x,r0-hes*x).  first form q*r0.

      do k=1,ll
        kp1 = k + 1
        iq = 2*(k-1) + 1
        c = q(iq)
        s = q(iq+1)
        t1 = r0(k)
        t2 = r0(kp1)
        r0(k) = c*t1 - s*t2
        r0(kp1) = s*t1 + c*t2
      enddo

!     now solve  r*x = q*r0.

      do kb=1,ll
        k = ll + 1 - kb
        r0(k) = r0(k)/hes(k,k)
        t = -r0(k)
        call slap_saxpy (k-1, t, hes(1,k), 1, r0(1), 1)
      enddo

      do k=1,n
        z(k) = 0.0e0
      enddo
      do i=1,ll
        call slap_saxpy (n, r0(i), v(1,i), 1, z, 1)
      enddo
      if ((jscal .eq. 1) .or.(jscal .eq. 3)) then
        do i=1,n
          z(i) = z(i)/sz(i)
        enddo
      endif

      if (jpre .gt. 0) then
        call slap_scopy (n, z, 1, wk, 1)
        call slap_sslui (n, nulm, wk, z, rpar, ipar, nr_jl2, nc_iu2)
        nmsl = nmsl + 1
      endif

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_sslui (nord, nulm, b, x, rwork, iwork, nr_jl2
     &,                      nc_iu2)
!=======================================================================

      implicit none

      integer nord, nulm, iwork(*)
      integer locl, locdin, locu, locju, locil, locjl, lociu
      integer,dimension (nulm) :: nr_jl2, nc_iu2

      real    b(nord), x(nord), rwork(*)

!     pull out the locations of the arrays holding the ilu factorization
      locil = iwork(1)
      locjl = iwork(2)
      lociu = iwork(3)
      locju = iwork(4)
      locl = iwork(5)
      locdin = iwork(6)
      locu = iwork(7)

!     solve the system lux = b
      call slap_sslui2 (nord, nulm, b, x, iwork(locil), iwork(locjl)
     &,                 rwork(locl), rwork(locdin), iwork(lociu)
     &,                 iwork(locju), rwork(locu), nr_jl2, nc_iu2)

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_sslui2 (nord, nulm, b, x, il, jl, l, dinv, iu, ju
     &,                       u, nr_jl2, nc_iu2)
!=======================================================================

      implicit none

!***purpose  slap back solve for ldu factorization.
!            routine  to  solve a system of the form  l*d*u x  =  b,
!            where l is a unit  lower  triangular  matrix,  d  is  a
!            diagonal matrix, and u is a unit upper triangular matrix.
! *arguments:
! nord   :in       integer
!         order of the matrix.
! nulm   :in       integer
! b      :in       real b(n).
!         right hand side.
! x      :out      real x(n).
!         solution of l*d*u x = b.
! il     :in       integer il(n+1).
! jl     :in       integer jl(nl).
!  l     :in       real     l(nl).
!         il, jl, l contain the unit  lower triangular factor of the
!         incomplete decomposition of some matrix stored in slap row
!         format.  the diagonal of ones *is* stored.  this structure
!         can   be   set up  by   the  ssilus routine.   see
!         "description", below  for more   details about   the  slap
!         format.
! dinv   :in       real dinv(n).
!         inverse of the diagonal matrix d.
! nu     :in       integer.
!         number of non-zeros in the u array.
! iu     :in       integer iu(n+1).
! ju     :in       integer ju(nu).
! u      :in       real     u(nu).
!         iu, ju, u contain the unit upper triangular factor  of the
!         incomplete decomposition  of  some  matrix stored in  slap
!         column format.   the diagonal of ones  *is* stored.   this
!         structure can be set up  by the ssilus routine.  see
!         "description", below   for  more   details about  the slap
!         format.

! *description:
!       this routine is supplied with  the slap package as a routine
!       to  perform  the  sslui operation  in   the  sir and   sbcg
!       iteration routines for  the  drivers ssilur and sslubc.   it
!       must  be called  via   the  slap  sslui  calling   sequence
!       convention interface routine sslui.
!         **** this routine itself does not conform to the ****
!               **** slap sslui calling convention ****

!       il, jl, l should contain the unit lower triangular factor of
!       the incomplete decomposition of the a matrix  stored in slap
!       row format.  iu, ju, u should contain  the unit upper factor
!       of the  incomplete decomposition of  the a matrix  stored in
!       slap column format this ilu factorization can be computed by
!       the ssilus routine.  the diagonals (which is all one's) are
!       stored.

      integer nord, nulm, il(*), jl(*), iu(*), ju(*)
      integer i, ii
      integer,dimension (nulm) :: nr_jl2, nc_iu2

      real    b(nord), x(nord), l(*), dinv(nord), u(*)

      LOGICAL,save :: first_time  = .true.

!     Solve  l*y = b,  storing result in x, l stored by rows.

      do i=1,nord
        x(i) = b(i)
      enddo

      if (first_time) then
        call slap_calc_nr_jl2 (nord, nulm, il, ju, nr_jl2, nc_iu2)
        first_time = .false.
      endif

!     do NOT USE:   !CDIR NODEP   data dependency exists
      do ii=1, nulm
        x(nr_jl2(ii)) = x(nr_jl2(ii)) -l(ii)*x(jl(ii))
      enddo

!     Solve  d*z = y,  storing result in x.
      do i=1,nord
        x(i) = x(i)*dinv(i)
      enddo

!     do NOT USE:   !CDIR NODEP   data dependency exists
      do ii=nulm,1,-1
        x(iu(ii)) = x(iu(ii)) - u(ii)*x(nc_iu2(ii))
      enddo

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_sheqr (a, lda, n, q, info, ijob)
!=======================================================================

      implicit none

      integer lda, n, info, ijob
      real a(lda,*), q(*)

!     local variables.

      integer i, iq, j, k, km1, kp1, nm1
      real c, s, t, t1, t2

      if (ijob .le. 1) then
!   -------------------------------------------------------------------
!       a new factorization is desired.
!   -------------------------------------------------------------------
!       qr decomposition without pivoting.
        info = 0
        do k=1,n
          km1 = k - 1
          kp1 = k + 1

!         compute k-th column of r.
!         first, multiply the k-th column of a by the previous
!         k-1 givens rotations.
          if (km1 .ge. 1) then
            do j=1,km1
              i = 2*(j-1) + 1
              t1 = a(j,k)
              t2 = a(j+1,k)
              c = q(i)
              s = q(i+1)
              a(j,k) = c*t1 - s*t2
              a(j+1,k) = s*t1 + c*t2
            enddo
          endif

!         compute givens components c and s.
          iq = 2*km1 + 1
          t1 = a(k,k)
          t2 = a(kp1,k)
          if (t2 .eq. 0.0e0) then
            c = 1.0e0
            s = 0.0e0
          elseif (abs(t2).ge.abs(t1)) then
            t = t1/t2
            s = -1.0e0/sqrt(1.0e0+t*t)
            c = -s*t
          else
            t = t2/t1
            c = 1.0e0/sqrt(1.0e0+t*t)
            s = -c*t
          endif
          q(iq) = c
          q(iq+1) = s
          a(k,k) = c*t1 - s*t2
          if (a(k,k) .eq. 0.0e0) info = k
        enddo

      else
!   -------------------------------------------------------------------
!       the old factorization of a will be updated.  a row and a
!       column has been added to the matrix a.  n by n-1 is now
!       the old size of the matrix.
!   -------------------------------------------------------------------
        nm1 = n - 1
!   -------------------------------------------------------------------
!       multiply the new column by the n previous givens rotations.
!   -------------------------------------------------------------------
        do k=1,nm1
          i = 2*(k-1) + 1
          t1 = a(k,n)
          t2 = a(k+1,n)
          c = q(i)
          s = q(i+1)
          a(k,n) = c*t1 - s*t2
          a(k+1,n) = s*t1 + c*t2
        enddo
!   -------------------------------------------------------------------
!     complete update of decomposition by forming last givens
!     rotation, and multiplying it times the column
!     vector(a(n,n),a(np1,n)).
!   -------------------------------------------------------------------
        info = 0
        t1 = a(n,n)
        t2 = a(n+1,n)
        if (t2 .eq. 0.0e0) then
          c = 1.0e0
          s = 0.0e0
        elseif (abs(t2).ge.abs(t1)) then
          t = t1/t2
          s = -1.0e0/sqrt(1.0e0+t*t)
        c = -s*t
        else
          t = t2/t1
          c = 1.0e0/sqrt(1.0e0+t*t)
          s = -c*t
        endif
        iq = 2*n - 1
        q(iq) = c
        q(iq+1) = s
        a(n,n) = c*t1 - s*t2
        if (a(n,n) .eq. 0.0e0) info = n

      endif

      return
      end

!=======================================================================
!=======================================================================
      real function slap_snrm2 (n, sxx, incx)
!=======================================================================

      implicit none

      integer next, i, j, n, nn, incx
      real sxx(*),  cutlo, cuthi, hitest, sum, xmax, zero, one
      data zero, one /0.0e0, 1.0e0/

!     euclidean norm of the n-vector stored in sxx() with storage
!     increment incx .
!     if    n .le. 0 return with result = 0.
!     if n .ge. 1 then incx must be .ge. 1

!     four phase method     using two built-in constants that are
!     hopefully applicable to all machines.
!         cutlo = maximum of  sqrt(u/eps)  over all known machines.
!         cuthi = minimum of  sqrt(v)      over all known machines.
!     where
!         eps = smallest no. such that eps + 1. .gt. 1.
!         u   = smallest positive no.   (underflow limit)
!         v   = largest  no.            (overflow  limit)

!     brief outline of algorithm..

!     phase 1    scans zero components.
!     move to phase 2 when a component is nonzero and .le. cutlo
!     move to phase 3 when a component is .gt. cutlo
!     move to phase 4 when a component is .ge. cuthi/m
!     where m = n for x() real and m = 2*n for complex.

      data cutlo, cuthi / 4.441e-16,  1.304e19 /

      if (n .eq. 0) then
        slap_snrm2  = zero
        return
      endif

      next = 30
      sum = zero
      nn = n * incx
!     begin main loop
      i = 1
   20 if (next .eq. 30) goto 30
      if (next .eq. 50) goto 50
      if (next .eq. 70) goto 70
      if (next .eq. 110) goto 110

   30 if (abs(sxx(i)) .gt. cutlo) go to 85
      next = 50
      xmax = zero

!     phase 1.  sum is zero

   50 if (sxx(i) .eq. zero) go to 200
      if (abs(sxx(i)) .gt. cutlo) go to 85

!     prepare for phase 2.
      next = 70
      go to 105

!     prepare for phase 4.

  100 i = j
      next = 110
      sum = (sum / sxx(i)) / sxx(i)
  105 xmax = abs(sxx(i))
      go to 115

!     phase 2.  sum is small. scale to avoid destructive underflow.

   70 if (abs(sxx(i)) .gt. cutlo) go to 75

!     common code for phases 2 and 4. in phase 4 sum is large.
!     scale to avoid overflow.

  110 if (abs(sxx(i)) .le. xmax) go to 115
        sum = one + sum * (xmax / sxx(i))**2
        xmax = abs(sxx(i))
        go to 200

  115 sum = sum + (sxx(i)/xmax)**2
      go to 200

!     prepare for phase 3.

   75 sum = (sum * xmax) * xmax

!     for real or d.p. set hitest = cuthi/n
!     for complex      set hitest = cuthi/(2*n)

   85 hitest = cuthi/float(n)

!     phase 3.  sum is mid-range.  no scaling.

      do j=i,nn,incx
        if (abs(sxx(j)) .ge. hitest) go to 100
        sum = sum + sxx(j)**2
      enddo
      slap_snrm2 = sqrt(sum)
      go to 300

  200 continue
      i = i + incx
      if (i .le. nn) go to 20

!     end of main loop.

!     compute square root and adjust for scaling.

      slap_snrm2 = xmax * sqrt(sum)
  300 continue

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_sorth (vnew, v, hes, n, ll, ldhes, kmp, snormw)
!=======================================================================

      implicit none

      integer n, ll, ldhes, kmp
      real vnew, v, hes, snormw
      dimension vnew(*), v(n,*), hes(ldhes,*)
      real slap_snrm2, slap_sdot

!     internal variables.

      integer i, i0
      real arg, sumdsq, tem, vnrm

!     get norm of unaltered vnew for later use.
      vnrm = slap_snrm2(n, vnew, 1)
!   -------------------------------------------------------------------
!     perform the modified gram-schmidt procedure on vnew =a*v(ll).
!     scaled inner products give new column of hes.
!     projections of earlier vectors are subtracted from vnew.
!   -------------------------------------------------------------------
      i0 = max0(1,ll-kmp+1)
      do i=i0,ll
        hes(i,ll) = slap_sdot (n, v(1,i), 1, vnew, 1)
        tem = -hes(i,ll)
        call slap_saxpy (n, tem, v(1,i), 1, vnew, 1)
      enddo
!   -------------------------------------------------------------------
!     compute snormw = norm of vnew.  if vnew is small compared
!     to its input value (in norm), then reorthogonalize vnew to
!     v(*,1) through v(*,ll).  correct if relative correction
!     exceeds 1000*(unit roundoff).  finally, correct snormw using
!     the dot products involved.
!   -------------------------------------------------------------------
      snormw = slap_snrm2 (n, vnew, 1)
      if (vnrm + 0.001e0*snormw .ne. vnrm) return
      sumdsq = 0.0e0
      do i=i0,ll
        tem = -slap_sdot (n, v(1,i), 1, vnew, 1)
        if (hes(i,ll) + 0.001e0*tem .eq. hes(i,ll)) go to 30
        hes(i,ll) = hes(i,ll) - tem
        call slap_saxpy (n, tem, v(1,i), 1, vnew, 1)
        sumdsq = sumdsq + tem**2
      enddo
 30   if (sumdsq .eq. 0.0e0) return
      arg = amax1 (0.0e0,snormw**2 - sumdsq)
      snormw = sqrt(arg)

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_srlcal (n, kmp, ll, maxl, v, q, rl, snormw, prod
     &,                       r0nrm)
!=======================================================================

      implicit none

      integer n, kmp, ll, maxl
      real v, q, rl, snormw
      dimension v(n,*), q(*), rl(n)

!     internal variables.
      integer i, ip1, i2, k, llp1, llm1
      real    c, s, tem, prod, r0nrm

      if (kmp .eq. maxl) then
!       calculate rl.  start by copying v(*,1) into rl.
        call slap_scopy (n, v(1,1), 1, rl, 1)
        llm1 = ll - 1
        do i=1,llm1
          ip1 = i + 1
          i2 = i*2
          s = q(i2)
          c = q(i2-1)
          do k=1,n
            rl(k) = s*rl(k) + c*v(k,ip1)
          enddo
        enddo
        s = q(2*ll)
        c = q(2*ll-1)/snormw
        llp1 = ll + 1
        do k = 1,n
          rl(k) = s*rl(k) + c*v(k,llp1)
        enddo
      endif

!     when kmp < maxl, rl vector already partially calculated.
!     scale rl by r0nrm*prod to obtain the residual rl.
      tem = r0nrm*prod
      call slap_sscal (n, tem, rl, 1)

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_sscal (n, sa, sxx, incx)
!=======================================================================

      implicit none

!     scales a vector by a constant.
!     uses unrolled loops for increment equal to 1.
!     based on code by j. dongarra

      real sa, sxx(*)
      integer i, ix, incx, m, mp1, n

      if (n .le. 0) return

      if (incx .ne. 1) then
!       code for increment not equal to 1
        ix = 1
        if (incx .lt. 0) ix = (-n+1)*incx + 1
        do i=1,n
          sxx(ix) = sa*sxx(ix)
          ix = ix + incx
        enddo

      else
!       code for increment equal to 1
!       clean-up loop
        m = mod(n,5)
        if (m .ne. 0) then
          do i=1,m
            sxx(i) = sa*sxx(i)
          enddo
          if (n .lt. 5) return
        endif
        mp1 = m + 1
        do i=mp1,n,5
          sxx(i) = sa*sxx(i)
          sxx(i + 1) = sa*sxx(i + 1)
          sxx(i + 2) = sa*sxx(i + 2)
          sxx(i + 3) = sa*sxx(i + 3)
          sxx(i + 4) = sa*sxx(i + 4)
        enddo

      endif

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_get_iujl (iu, jl, nblks, kstride, nulm)
!=======================================================================

      implicit none

      integer :: ii, iii, jj, nblks, kstride, nulm
      integer,dimension(nulm) :: iu, jl

!  Assume the structure of the matrix to be BLOCK TRIDIAGONAL, i.e.
!  there are three diagonals (main, upper and lower) of block matrices.

!  Main block diagonal consists of nblks block matrices.
!  The block matrices are of order kstride.
!  Upper block diagonal consists of nblks-1 block matrices.
!  Lower block diagonal consists of nblks-1 block matrices.

!  This subroutine attempts to construct certain patterns used in
!  the manipulation of the elements within the BLOCK TRIDIAGONAL matrix.

!     First block matrix on Main Block Diagonal:

      do ii=1,kstride-2
        jl(ii) = ii
      enddo

      jl(kstride-1) = 1
      jl(kstride)   = kstride-1

      ii = kstride

!     Lower Block Diagonals

!     First row of block matrix

      do jj=2,nblks
        ii = ii + 1
        iii = 1
        jl(ii) = iii + (jj-2)*kstride

!        Intermediate rows of block matrix

        do iii=2,kstride-1
          ii = ii + 1
          jl(ii) = iii + (jj-2)*kstride
          ii = ii + 1
          jl(ii) = iii + (jj-1)*kstride - 1
        enddo

!       Last row of block matrix

        iii=kstride
        ii = ii + 1
        jl(ii) = iii + (jj-2)*kstride
        ii = ii + 1
        jl(ii) = iii + (jj-2)*kstride + 1
        ii = ii + 1
        jl(ii) = iii + (jj-1)*kstride - 1
      enddo

      iu = jl

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_gen_nrnc (nrow, ncol, il, ju, ia, ja, nord, nelm
     &,                         nblks, kstride, nulm, nc_aj, nr_jl
     &,                         nr_aj)
!=======================================================================

      implicit none

      integer :: ii, iii, jj, irow, icol, iu_kount, jl_kount, jbgn, jend
      integer :: nord, nelm, nblks, kstride, nulm

      integer,dimension(nulm) :: nrow, ncol
      integer,dimension(nulm) :: nc_aj, nr_jl, nr_aj
      integer,dimension(nulm) :: il,ju
      integer,dimension(nelm) :: ia,ja

!     nrow     OUTPUT
!     ncol     OUTPUT
!     il       OUTPUT
!     ju       OUTPUT

!     ia       INPUT
!     ja       INPUT

!     nc_aj    OUTPUT
!     nr_jl    OUTPUT
!     nr_aj    OUTPUT

!  Assume the structure of the matrix to be BLOCK TRIDIAGONAL, i.e.
!  there are three diagonals (main, upper and lower) of block matrices.

!  Main block diagonal consists of nblks block matrices.
!  The block matrices are of order kstride
!  Upper block diagonal consists of nblks-1 block matrices.
!  Lower block diagonal consists of nblks-1 block matrices.

!  This subroutine attempts to construct certain patterns used in
!  the manipulation of the elements within the BLOCK TRIDIAGONAL matrix.

!  Count number of elements in each row (column) of the lower (upper) triangle.
!  Move along the diagonal and count the non-zero elements to the LEFT (ABOVE)
!  of the diagonal element.

!  First block matrix on Main Block Diagonal:

      ii=1
      nrow(ii) = 0
      ncol(ii) = 0

      do ii=2,kstride-1
        nrow(ii) = 1
        ncol(ii) = 1
      enddo

      nrow(kstride) = 2
      ncol(kstride) = 2

      ii = kstride

!  Other Blocks on Main Block Diagonal

!     First row of block matrix

      do jj=2,nblks
        ii = 1
        iii = ii + (jj-1)*kstride
        nrow(iii) = 1
        ncol(iii) = 1
      enddo

!  Intermediate rows of block matrix

      do jj=2,nblks
        do ii=2,kstride-1
          iii = ii + (jj-1)*kstride
          nrow(iii) = 2
          ncol(iii) = 2
        enddo
      enddo

!     Last row of block matrix

      do jj=2,nblks
        ii=kstride
        iii = ii + (jj-1)*kstride
        nrow(iii) = 3
        ncol(iii) = 3
      enddo

!  Second usage of nrow,ncol

      ju(1) = 1
      il(1) = 1
      do icol=1,nord
        il(icol+1) = il(icol) + nrow(icol)
        ju(icol+1) = ju(icol) + ncol(icol)
        nrow(icol) = il(icol)
        ncol(icol) = ju(icol)
      enddo

      iu_kount = 0
      jl_kount = 0

      do icol=1,nord
        jbgn = ja(icol)+1
        jend = ja(icol+1)-1
        if (jbgn.le.jend) then
          do jj=jbgn, jend
            irow = ia(jj)
            if (irow.lt.icol) then
!             part of the upper triangle.
              iu_kount = iu_kount + 1
              nc_aj(iu_kount) = jj
              ncol(icol) = ncol(icol) + 1
            else
!             part of the lower triangle (stored by row).
              jl_kount = jl_kount + 1
              nr_jl(jl_kount) = nrow(irow)
              nr_aj(jl_kount) = jj
              nrow(irow) = nrow(irow) + 1
            endif
          enddo
        endif

      enddo

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_write23 (nord, ntrans, nl, il, jl, nu, iu, ju
     &,                        nt_indx, nt_indxr1, nt_kc, nt_indxc1
     &,                        nt_i, nt_type)
!=======================================================================

      implicit none

      integer nord, ntrans, nl, nu, il(nl), jl(nl), iu(nu), ju(nu)
      integer itrans, i, indx, indx1, indx2, indxr1, indxr2, indxc2
      integer indxc1, kc, kr
      integer,dimension(ntrans) :: nt_indx, nt_indxr1, nt_kc, nt_indxc1
      integer,dimension(ntrans) :: nt_i, nt_type

! SEGMENT 4

!     Perform the incomplete ldu decomposition.
      itrans = 0

      do 300 i=2,nord

!       i-th row of l
        indx1 = il(i)
        indx2 = il(i+1) - 1
        do indx=indx1,indx2
          if (indx .eq. indx1) go to 180
          indxr1 = indx1
          indxr2 = indx - 1
          indxc1 = ju(jl(indx))
          indxc2 = ju(jl(indx)+1) - 1
          if (indxc1 .gt. indxc2) go to 180
 160      kr = jl(indxr1)
 170      kc = iu(indxc1)
          if (kr .gt. kc) then
            indxc1 = indxc1 + 1
            if (indxc1 .le. indxc2) go to 170
          elseif (kr .lt. kc) then
            indxr1 = indxr1 + 1
            if (indxr1 .le. indxr2) go to 160
          elseif (kr .eq. kc) then
            itrans = itrans + 1
            nt_type(itrans)   = 1
            nt_indx(itrans)   = indx
            nt_indxr1(itrans) = indxr1
            nt_kc(itrans)     = kc
            nt_indxc1(itrans) = indxc1
            nt_i(itrans)      = i
            indxr1 = indxr1 + 1
            indxc1 = indxc1 + 1
            if (indxr1.le.indxr2 .and. indxc1.le.indxc2) go to 160
          endif
 180      continue
          itrans = itrans + 1
          nt_type(itrans)   = 2
          nt_indx(itrans)   = indx
          nt_i(itrans)      = i
        enddo

!       ith column of u
 200    indx1 = ju(i)
        indx2 = ju(i+1) - 1
        do indx=indx1,indx2
          if (indx .eq. indx1) go to 240
          indxc1 = indx1
          indxc2 = indx - 1
          indxr1 = il(iu(indx))
          indxr2 = il(iu(indx)+1) - 1
          if (indxr1 .gt. indxr2) go to 240
 210      kr = jl(indxr1)
 220      kc = iu(indxc1)
          if (kr .gt. kc) then
            indxc1 = indxc1 + 1
            if (indxc1 .le. indxc2) go to 220
          elseif (kr .lt. kc) then
            indxr1 = indxr1 + 1
            if (indxr1 .le. indxr2) go to 210
          elseif (kr .eq. kc) then
            itrans = itrans + 1
!  Set up:  nt_indx,nt_indxr1,nt_kc,nt_indxc1,nt_i
            nt_type(itrans)   = 3
            nt_indx(itrans)   = indx
            nt_indxr1(itrans) = indxr1
            nt_kc(itrans)     = kc
            nt_indxc1(itrans) = indxc1
            nt_i(itrans)      = i
            indxr1 = indxr1 + 1
            indxc1 = indxc1 + 1
            if (indxr1.le.indxr2 .and. indxc1.le.indxc2) go to 210
          endif
 240      continue
          itrans = itrans + 1
          nt_type(itrans)   = 4
          nt_indx(itrans)   = indx
          nt_i(itrans)      = i
        enddo

!       ith diagonal element
        indxr1 = il(i)
        indxr2 = il(i+1) - 1
        if (indxr1 .gt. indxr2) go to 300
        indxc1 = ju(i)
        indxc2 = ju(i+1) - 1
        if (indxc1 .gt. indxc2) go to 300
 270    kr = jl(indxr1)
 280    kc = iu(indxc1)
        itrans = itrans + 1
        if (kr .gt. kc) then
          indxc1 = indxc1 + 1
          if (indxc1 .le. indxc2) go to 280
        elseif (kr .lt. kc) then
          indxr1 = indxr1 + 1
          if (indxr1 .le. indxr2) go to 270
        elseif (kr .eq. kc) then
          nt_type(itrans)   = 5
          nt_indx(itrans)   = indx
          nt_indxr1(itrans) = indxr1
          nt_kc(itrans)     = kc
          nt_indxc1(itrans) = indxc1
          nt_i(itrans)      = i
          indxr1 = indxr1 + 1
          indxc1 = indxc1 + 1
          if (indxr1 .le. indxr2 .and. indxc1 .le. indxc2) go to 270
        endif

        indxr1 = 0
        kc     = 0
        indxc1 = 0

 300  continue

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_calc_nr_jl2 (nord, nulm, il, ju, nr_jl2, nc_iu2)
!=======================================================================

      implicit none

      integer,dimension(*)      :: il, ju
      integer nord, nulm, icol, irow, j, jbgn, jend
      integer,dimension(nulm)   :: nr_jl2, nc_iu2

      do irow=2,nord
        jbgn = il(irow)
        jend = il(irow+1)-1
        do j=jbgn,jend
          nr_jl2(j) = irow
        enddo
      enddo

      do icol=nord, 2, -1
        jbgn = ju(icol)
        jend = ju(icol+1)-1
        do j=jbgn, jend
          nc_iu2(j) = icol
        enddo
      enddo

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_ssmv (n, x, y, nelt, ia, ja, ik1, ik26, ik37, ik4
     &,                     ik5, ia2, ia3, a, istride)
!=======================================================================

      implicit none

!***purpose  slap column format sparse matrix vector product.
!            routine to calculate the sparse matrix vector product:
!            y = a*x.
! *arguments:
! n      :in       integer.
!         order of the matrix.
! x      :in       real x(n).
!         the vector that should be multiplied by the matrix.
! y      :out      real y(n).
!         the product of the matrix and the vector.
! nelt   :in       integer.
!         number of non-zeros stored in a.
! ia     :in       integer ia(nelt).
! ja     :in       integer ja(n+1).
! a      :in       integer a(nelt).
!         these arrays should hold the matrix a in the slap column
!         format.  see "description", below.

! *cautions:
!     this   routine   assumes  that  the matrix a is stored in slap
!     column format.  it does not check  for  this (for  speed)  and
!     evil, ugly, ornery and nasty things  will happen if the matrix
!     data  structure  is,  in fact, not slap column.  beware of the
!     wrong data structure

      integer n, nelt, ia(nelt), ja(nelt), nn, nn1
      integer ik1(n), ik26(n), ik37(n), ik4(n), ik5(n), ia2(n), ia3(n)
      integer i, j, jbgn, jend, irow, istride, icol, ibgn, iend
      real    a(nelt), x(n), y(n)

#if defined uvic_embm_slap_scalar
      y(:) = 0.0
      do icol=1,n
        ibgn = ja(icol)
        iend = ja(icol+1)-1
        do i=ibgn,iend
          y(ia(i)) = y(ia(i)) + a(i)*x(icol)
        enddo
      enddo
#else
      nn = n
      nn1 = nn - istride

!CDIR NODEP
      do i=1,istride
        y(i) = a(ik1(i))*x(i) + a(ik5(i))*x(i+istride)
      enddo

!CDIR NODEP
      do i=1+istride,nn1
        y(i) = a(ik1(i))*x(i) + a(ik5(i))*x(i+istride)
     &       + a(ik4(i-istride))*x(i-istride)
      enddo

!CDIR NODEP
      do i=nn1+1,nn
        y(i) = a(ik1(i))*x(i) + a(ik4(i-istride))*x(i-istride)
      enddo

!CDIR NODEP
      do i=1,nn
        y(ia2(i)) = y(ia2(i)) + a(ik26(i))*x(i)
        y(ia3(i)) = y(ia3(i)) + a(ik37(i))*x(i)
      enddo
#endif

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_saxpy (n, sa, sxx, incx, sy, incy)
!=======================================================================

      implicit none

!     constant times a vector plus a vector.
!     uses unrolled loop for increments equal to one.
!     based on code by j. dongarra

      real sxx(*), sy(*), sa
      integer i, incx, incy, ix, iy, m, mp1, n

      if (n .le. 0) return
      if (sa .eq. 0.0) return

      if (incx .ne. 1 .or. incy .ne. 1) then
!       code for unequal increments or equal increments not equal to 1
        ix = 1
        iy = 1
        if (incx .lt. 0) ix = (-n+1)*incx + 1
        if (incy .lt. 0) iy = (-n+1)*incy + 1
        do i=1,n
          sy(iy) = sy(iy) + sa*sxx(ix)
          ix = ix + incx
          iy = iy + incy
        enddo

      else
!       code for both increments equal to 1
!       clean-up loop
        m = mod(n,4)
        if (m .ne. 0) then
          do i=1,m
            sy(i) = sy(i) + sa*sxx(i)
          enddo
          if (n .lt. 4) return
        endif
        mp1 = m + 1
        do i=mp1,n,4
          sy(i) = sy(i) + sa*sxx(i)
          sy(i + 1) = sy(i + 1) + sa*sxx(i + 1)
          sy(i + 2) = sy(i + 2) + sa*sxx(i + 2)
          sy(i + 3) = sy(i + 3) + sa*sxx(i + 3)
        enddo

      endif

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_scopy (n, sxx, incx, sy, incy)
!=======================================================================

      implicit none

!     copies a vector, x, to a vector, y.
!     uses unrolled loops for increments equal to 1.
!     based on code by j. dongarra

      real sxx(*), sy(*)
      integer i, incx, incy, ix, iy, m, mp1, n

      if (n .le. 0) return

      if (incx .ne. 1 .or. incy .ne. 1) then
!       code for unequal increments or equal increment not equal to 1
        ix = 1
        iy = 1
        if (incx .lt. 0) ix = (-n+1)*incx + 1
        if (incy .lt. 0) iy = (-n+1)*incy + 1
        do i=1,n
          sy(iy) = sxx(ix)
          ix = ix + incx
          iy = iy + incy
        enddo

      else
!       code for both increments equal to 1
!       clean-up loop
        m = mod(n,7)
        if (m .ne. 0) then
          do i = 1,m
            sy(i) = sxx(i)
          enddo
          if (n .lt. 7) return
        endif
        mp1 = m + 1
        do i=mp1,n,7
          sy(i) = sxx(i)
          sy(i + 1) = sxx(i + 1)
          sy(i + 2) = sxx(i + 2)
          sy(i + 3) = sxx(i + 3)
          sy(i + 4) = sxx(i + 4)
          sy(i + 5) = sxx(i + 5)
          sy(i + 6) = sxx(i + 6)
        enddo

      endif

      return
      end

!=======================================================================
!=======================================================================
      real function slap_sdot (n, sxx, incx, sy, incy)
!=======================================================================

      implicit none

!     forms the dot product of two vectors.
!     uses unrolled loops for increments equal to one.
!     based on code by j. dongarra

      real sxx(*), sy(*), stemp
      integer i, incx, incy, ix, iy, m, mp1, n

      stemp = 0.0e0
      slap_sdot = 0.0e0
      if (n .le. 0) return

      if (incx .ne. 1 .or. incy .ne. 1) then
!        code for unequal increments or equal increments not equal to 1
        ix = 1
        iy = 1
        if (incx .lt. 0) ix = (-n+1)*incx + 1
        if (incy .lt. 0) iy = (-n+1)*incy + 1
        do i=1,n
          stemp = stemp + sxx(ix)*sy(iy)
          ix = ix + incx
          iy = iy + incy
        enddo

      else
!       code for both increments equal to 1
!       clean-up loop
        m = mod(n,5)
        if (m .ne. 0) then
          do i=1,m
            stemp = stemp + sxx(i)*sy(i)
          enddo
          if (n .lt. 5) then
            slap_sdot = stemp
            return
          endif
        endif
        mp1 = m + 1
        do i=mp1,n,5
          stemp = stemp + sxx(i)*sy(i) + sxx(i + 1)*sy(i + 1)
     &          + sxx(i + 2)*sy(i + 2) + sxx(i + 3)*sy(i + 3)
     &          + sxx(i+4)*sy(i+4)
        enddo

      endif
      slap_sdot = stemp

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_ss2y(n, nelt, ia, ja, ik,  a)
!=======================================================================

      implicit none

!***purpose  slap triad to slap column format converter.
!            routine to convert from the slap triad to slap column
!            format.
! *arguments:
! n      :in       integer
!         order of the matrix.
! nelt   :in       integer.
!         number of non-zeros stored in a.
! ia     :inout    integer ia(nelt).
! ja     :inout    integer ja(nelt).
! a      :inout    real a(nelt).
!         these arrays should hold the matrix a in either the slap
!         triad format or the slap column format.  see "long
!         description", below.  if the slap triad format is used
!         this format is translated to the slap column format by
!         this routine.

!       the sparse linear algebra package (slap) utilizes two matrix
!       data structures: 1) the  slap triad  format or  2)  the slap
!       column format.  the user can hand this routine either of the
!       of these data structures.  if the slap triad format is give
!       as input then this routine transforms it into slap column
!       format.  the way this routine tells which format is given as
!       input is to look at ja(n+1).  if ja(n+1) = nelt+1 then we
!       have the slap column format.  if that equality does not hold
!       then it is assumed that the ia, ja, a arrays contain the
!       slap triad format.

      integer n, nelt, ia(nelt), ja(nelt), ik(nelt)
      integer i, itemp, ibgn, iend, icol, j
      real    a(nelt), temp
      integer :: ktemp

!     check to see if the (ia,ja,a) arrays are in slap column
!     format.  if it's not then transform from slap triad.
      if (ja(n+1).eq.nelt+1) return

!     sort into ascending order by column (on the ja array).
!     this will line up the columns.

      call slap_qs2i1r (ja, ia, ik, a, nelt, 1)

!     loop over each column to see where the column indicies change
!     in the column index array ja.  this marks the beginning of the
!     next column.

      ja(1) = 1
      do 20 icol = 1, n-1
         do 10 j = ja(icol)+1, nelt
            if (ja(j).ne.icol) then
               ja(icol+1) = j
               goto 20
            endif
 10      continue
 20   continue
      ja(n+1) = nelt+1

!     mark the n+2 element so that future calls to a slap routine
!     utilizing the ysmp-column storage format will be able to tell.

      do icol=n+2,nelt
        ja(icol) = 0
      enddo

!     now loop thru the ia(i) array making sure that the diagonal
!     matrix element appears first in the column.  then sort the
!     rest of the column in ascending order.

      do 70 icol = 1, n
         ibgn = ja(icol)
         iend = ja(icol+1)-1
         do 30 i = ibgn, iend
            if (ia(i).eq.icol) then
!         swap the diag element with the first element in the column.
               itemp = ia(i)
               ia(i) = ia(ibgn)
               ia(ibgn) = itemp

               ktemp = ik(i)
               ik(i) = ik(ibgn)
               ik(ibgn) = ktemp

               temp = a(i)
               a(i) = a(ibgn)
               a(ibgn) = temp
               goto 40
            endif
 30      continue
 40      ibgn = ibgn + 1
         if (ibgn.lt.iend) then
            do 60 i = ibgn, iend
               do 50 j = i+1, iend
                  if (ia(i).gt.ia(j)) then
                     itemp = ia(i)
                     ia(i) = ia(j)
                     ia(j) = itemp

                     ktemp = ik(i)
                     ik(i) = ik(j)
                     ik(j) = ktemp

                     temp = a(i)
                     a(i) = a(j)
                     a(j) = temp
                  endif
 50            continue
 60         continue
         endif
 70   continue

      return
      end

!=======================================================================
!=======================================================================
      subroutine slap_qs2i1r (ia, ja, ik, a, n, kflag)
!=======================================================================

      implicit none

!***purpose  sort an integer array also moving an integer and real array
!            this routine sorts the integer array ia and makes the same
!            interchanges in the integer array ja and the real array a.
!            the array ia may be sorted in increasing order or decreas-
!            ing order. a slightly modified quicksort algorithm is used.

!     description of parameters
!        ia - integer array of values to be sorted.
!        ja - integer array to be carried along.
!         a - real array to be carried along.
!         n - number of values in integer array ia to be sorted.
!     kflag - control parameter
!           = 1 means sort ia in increasing order.
!           =-1 means sort ia in decreasing order.

      dimension il(21),iu(21)
      integer   ia(n), ja(n), ik(n), it, iit, jt, jjt, k, kt, ikt
      integer   kflag, ij, iu, il, l, j, nn, n, m, i, kk
      real      a(n), ta, tta, r

      nn = n
      if (n .eq. 1) return
      kk = iabs(kflag)

!     alter array ia to get decreasing order if needed.

      if (kflag .lt. 1) then
        do i=1,nn
          ia(i) = -ia(i)
        enddo
      endif

!     sort ia and carry "ja, ik and a" along.
!     and now...just a little black magic...
      m = 1
      i = 1
      j = nn
      r = .375
 210  if (r .le. 0.5898437) then
        r = r + 3.90625e-2
      else
        r = r-.21875
      endif
 225  k = i

!     select a central element of the array and save it in location
!     it, jt, kt, ta.

      ij = i + ifix (float (j-i) *r)
      it = ia(ij)
      jt = ja(ij)
      kt = ik(ij)
      ta = a(ij)

!     if first element of array is greater than it, interchange with it.

      if (ia(i) .gt. it) then
        ia(ij) = ia(i)
        ia(i)  = it
        it     = ia(ij)

        ja(ij) = ja(i)
        ja(i)  = jt
        jt     = ja(ij)

        ik(ij) = ik(i)
        ik(i)  = kt
        kt     = ik(ij)

        a(ij)  = a(i)
        a(i)   = ta
        ta     = a(ij)
      endif
      l=j

!     if last element of array is less than it, swap with it.

      if (ia(j) .lt. it) then
        ia(ij) = ia(j)
        ia(j)  = it
        it     = ia(ij)

        ja(ij) = ja(j)
        ja(j)  = jt
        jt     = ja(ij)

        ik(ij) = ik(j)
        ik(j)  = kt
        kt     = ik(ij)

        a(ij)  = a(j)
        a(j)   = ta
        ta     = a(ij)

!     if first element of array is greater than it, swap with it.

        if (ia(i) .gt. it) then
          ia(ij) = ia(i)
          ia(i)  = it
          it     = ia(ij)

          ja(ij) = ja(i)
          ja(i)  = jt
          jt     = ja(ij)

          ik(ij) = ik(i)
          ik(i)  = kt
          kt     = ik(ij)

          a(ij)  = a(i)
          a(i)   = ta
          ta     = a(ij)
        endif
      endif

!     find an element in the second half of the array which is
!     smaller than it.

  240 l=l-1
      if (ia(l) .gt. it) go to 240

!     find an element in the first half of the array which is
!     greater than it.

  245 k=k+1
      if (ia(k) .lt. it) go to 245

!     interchange these elements.

      if (k .le. l) then
        iit   = ia(l)
        ia(l) = ia(k)
        ia(k) = iit

        jjt   = ja(l)
        ja(l) = ja(k)
        ja(k) = jjt

        ikt   = ik(l)
        ik(l) = ik(k)
        ik(k) = ikt

        tta   = a(l)
        a(l)  = a(k)
        a(k)  = tta
        goto 240
      endif

!     save upper and lower subscripts of the array yet to be sorted.

      if (l-i .gt. j-k) then
         il(m) = i
         iu(m) = l
         i = k
         m = m+1
      else
         il(m) = k
         iu(m) = j
         j = l
         m = m+1
      endif
      go to 260

!     begin again on another portion of the unsorted array.

  255 m = m-1
      if (m .eq. 0) go to 300
      i = il(m)
      j = iu(m)
  260 if (j-i .ge. 1) go to 225
      if (i .eq. j) go to 255
      if (i .eq. 1) go to 210
      i = i-1
  265 i = i+1
      if (i .eq. j) go to 255
      it = ia(i+1)
      jt = ja(i+1)
      kt = ik(i+1)
      ta =  a(i+1)
      if (ia(i) .le. it) go to 265
      k=i
  270 ia(k+1) = ia(k)
      ja(k+1) = ja(k)
      ik(k+1) = ik(k)
      a(k+1)  =  a(k)
      k = k-1
      if (it .lt. ia(k)) go to 270
      ia(k+1) = it
      ja(k+1) = jt
      ik(k+1) = kt
      a(k+1)  = ta
      go to 265

!     clean up, if necessary.

  300 if (kflag .lt. 1) then
        do i=1,nn
          ia(i) = -ia(i)
        enddo
      endif

      return
      end
