!--------------------------------------------------------------------------------------------------!
!   CP2K: A general program to perform molecular dynamics simulations                              !
!   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
!                                                                                                  !
!   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
!--------------------------------------------------------------------------------------------------!

! **************************************************************************************************
!> \brief routines that build the integrals of the Vxc potential calculated
!>      for the atomic density in the basis set of spherical primitives
! **************************************************************************************************
MODULE qs_vxc_atom
   USE atomic_kind_types,               ONLY: atomic_kind_type,&
                                              get_atomic_kind
   USE basis_set_types,                 ONLY: get_gto_basis_set,&
                                              gto_basis_set_type
   USE cp_control_types,                ONLY: dft_control_type
   USE input_constants,                 ONLY: xc_none
   USE input_section_types,             ONLY: section_vals_get_subs_vals,&
                                              section_vals_type,&
                                              section_vals_val_get
   USE kinds,                           ONLY: dp
   USE memory_utilities,                ONLY: reallocate
   USE message_passing,                 ONLY: mp_para_env_type
   USE orbital_pointers,                ONLY: indso,&
                                              nsoset
   USE paw_basis_types,                 ONLY: get_paw_basis_info
   USE qs_environment_types,            ONLY: get_qs_env,&
                                              qs_environment_type
   USE qs_grid_atom,                    ONLY: grid_atom_type
   USE qs_harmonics_atom,               ONLY: get_none0_cg_list,&
                                              harmonics_atom_type
   USE qs_kind_types,                   ONLY: get_qs_kind,&
                                              has_nlcc,&
                                              qs_kind_type
   USE qs_linres_types,                 ONLY: nablavks_atom_type
   USE qs_rho_atom_types,               ONLY: get_rho_atom,&
                                              rho_atom_coeff,&
                                              rho_atom_type
   USE util,                            ONLY: get_limit
   USE xc_atom,                         ONLY: fill_rho_set,&
                                              vxc_of_r_new,&
                                              xc_2nd_deriv_of_r,&
                                              xc_rho_set_atom_update
   USE xc_derivative_set_types,         ONLY: xc_derivative_set_type,&
                                              xc_dset_create,&
                                              xc_dset_release,&
                                              xc_dset_zero_all
   USE xc_derivatives,                  ONLY: xc_functionals_get_needs
   USE xc_rho_cflags_types,             ONLY: xc_rho_cflags_type
   USE xc_rho_set_types,                ONLY: xc_rho_set_create,&
                                              xc_rho_set_release,&
                                              xc_rho_set_type
#include "./base/base_uses.f90"

   IMPLICIT NONE

   PRIVATE

   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_vxc_atom'

   PUBLIC :: calculate_vxc_atom, &
             calculate_xc_2nd_deriv_atom, &
             calc_rho_angular, &
             calculate_gfxc_atom, &
             gfxc_atom_diff, &
             gaVxcgb_noGC

CONTAINS

! **************************************************************************************************
!> \brief ...
!> \param qs_env ...
!> \param energy_only ...
!> \param exc1 the on-body ex energy contribution
!> \param gradient_atom_set ...
!> \param adiabatic_rescale_factor ...
!> \param kind_set_external provides a non-default kind_set to use
!> \param rho_atom_set_external provides a non-default atomic density set to use
!> \param xc_section_external provides an external non-default XC
! **************************************************************************************************
   SUBROUTINE calculate_vxc_atom(qs_env, energy_only, exc1, gradient_atom_set, &
                                 adiabatic_rescale_factor, kind_set_external, &
                                 rho_atom_set_external, xc_section_external)

      TYPE(qs_environment_type), POINTER                 :: qs_env
      LOGICAL, INTENT(IN)                                :: energy_only
      REAL(dp), INTENT(INOUT)                            :: exc1
      TYPE(nablavks_atom_type), DIMENSION(:), OPTIONAL, &
         POINTER                                         :: gradient_atom_set
      REAL(dp), INTENT(IN), OPTIONAL                     :: adiabatic_rescale_factor
      TYPE(qs_kind_type), DIMENSION(:), OPTIONAL, &
         POINTER                                         :: kind_set_external
      TYPE(rho_atom_type), DIMENSION(:), OPTIONAL, &
         POINTER                                         :: rho_atom_set_external
      TYPE(section_vals_type), OPTIONAL, POINTER         :: xc_section_external

      CHARACTER(LEN=*), PARAMETER :: routineN = 'calculate_vxc_atom'

      INTEGER                                            :: bo(2), handle, ia, iat, iatom, idir, &
                                                            ikind, ir, ispin, myfun, na, natom, &
                                                            nr, nspins, num_pe
      INTEGER, DIMENSION(2, 3)                           :: bounds
      INTEGER, DIMENSION(:), POINTER                     :: atom_list
      LOGICAL                                            :: donlcc, epr_xc, gradient_f, lsd, nlcc, &
                                                            paw_atom, tau_f
      REAL(dp)                                           :: density_cut, exc_h, exc_s, gradient_cut, &
                                                            my_adiabatic_rescale_factor, tau_cut
      REAL(dp), DIMENSION(1, 1, 1)                       :: tau_d
      REAL(dp), DIMENSION(1, 1, 1, 1)                    :: rho_d
      REAL(dp), DIMENSION(:, :), POINTER                 :: rho_nlcc, weight
      REAL(dp), DIMENSION(:, :, :), POINTER              :: rho_h, rho_s, tau_h, tau_s, vtau_h, &
                                                            vtau_s, vxc_h, vxc_s
      REAL(dp), DIMENSION(:, :, :, :), POINTER           :: drho_h, drho_s, vxg_h, vxg_s
      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(gto_basis_set_type), POINTER                  :: basis_1c
      TYPE(harmonics_atom_type), POINTER                 :: harmonics
      TYPE(mp_para_env_type), POINTER                    :: para_env
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: my_kind_set
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: dr_h, dr_s, int_hh, int_ss, r_h, r_s
      TYPE(rho_atom_coeff), DIMENSION(:, :), POINTER     :: r_h_d, r_s_d
      TYPE(rho_atom_type), DIMENSION(:), POINTER         :: my_rho_atom_set
      TYPE(rho_atom_type), POINTER                       :: rho_atom
      TYPE(section_vals_type), POINTER                   :: input, my_xc_section, xc_fun_section
      TYPE(xc_derivative_set_type)                       :: deriv_set
      TYPE(xc_rho_cflags_type)                           :: needs
      TYPE(xc_rho_set_type)                              :: rho_set_h, rho_set_s

! -------------------------------------------------------------------------

      CALL timeset(routineN, handle)

      NULLIFY (atom_list)
      NULLIFY (my_kind_set)
      NULLIFY (atomic_kind_set)
      NULLIFY (grid_atom)
      NULLIFY (harmonics)
      NULLIFY (input)
      NULLIFY (para_env)
      NULLIFY (rho_atom)
      NULLIFY (my_rho_atom_set)
      NULLIFY (rho_nlcc)

      epr_xc = .FALSE.
      IF (PRESENT(gradient_atom_set)) THEN
         epr_xc = .TRUE.
      END IF

      IF (PRESENT(adiabatic_rescale_factor)) THEN
         my_adiabatic_rescale_factor = adiabatic_rescale_factor
      ELSE
         my_adiabatic_rescale_factor = 1.0_dp
      END IF

      CALL get_qs_env(qs_env=qs_env, &
                      dft_control=dft_control, &
                      para_env=para_env, &
                      atomic_kind_set=atomic_kind_set, &
                      qs_kind_set=my_kind_set, &
                      input=input, &
                      rho_atom_set=my_rho_atom_set)

      IF (PRESENT(kind_set_external)) my_kind_set => kind_set_external
      IF (PRESENT(rho_atom_set_external)) my_rho_atom_set => rho_atom_set_external

      nlcc = has_nlcc(my_kind_set)

      IF (epr_xc) THEN
         my_xc_section => section_vals_get_subs_vals(input, &
                                                     "PROPERTIES%LINRES%EPR%PRINT%G_TENSOR%XC")
      ELSE
         my_xc_section => section_vals_get_subs_vals(input, "DFT%XC")
      END IF

      IF (PRESENT(xc_section_external)) my_xc_section => xc_section_external

      xc_fun_section => section_vals_get_subs_vals(my_xc_section, "XC_FUNCTIONAL")
      CALL section_vals_val_get(xc_fun_section, "_SECTION_PARAMETERS_", &
                                i_val=myfun)

      IF (myfun == xc_none) THEN
         exc1 = 0.0_dp
         my_rho_atom_set(:)%exc_h = 0.0_dp
         my_rho_atom_set(:)%exc_s = 0.0_dp
      ELSE
         CALL section_vals_val_get(my_xc_section, "DENSITY_CUTOFF", &
                                   r_val=density_cut)
         CALL section_vals_val_get(my_xc_section, "GRADIENT_CUTOFF", &
                                   r_val=gradient_cut)
         CALL section_vals_val_get(my_xc_section, "TAU_CUTOFF", &
                                   r_val=tau_cut)

         lsd = dft_control%lsd
         nspins = dft_control%nspins
         needs = xc_functionals_get_needs(xc_fun_section, &
                                          lsd=lsd, &
                                          calc_potential=.TRUE.)

         ! whatever the xc, if epr_xc, drho_spin is needed
         IF (epr_xc) needs%drho_spin = .TRUE.

         gradient_f = (needs%drho .OR. needs%drho_spin)
         tau_f = (needs%tau .OR. needs%tau_spin)

         ! Initialize energy contribution from the one center XC terms to zero
         exc1 = 0.0_dp

         ! Nullify some pointers for work-arrays
         NULLIFY (rho_h, drho_h, rho_s, drho_s, weight)
         NULLIFY (vxc_h, vxc_s, vxg_h, vxg_s)
         NULLIFY (tau_h, tau_s)
         NULLIFY (vtau_h, vtau_s)

         ! Here starts the loop over all the atoms

         DO ikind = 1, SIZE(atomic_kind_set)
            CALL get_atomic_kind(atomic_kind_set(ikind), atom_list=atom_list, natom=natom)
            CALL get_qs_kind(my_kind_set(ikind), paw_atom=paw_atom, &
                             harmonics=harmonics, grid_atom=grid_atom)
            CALL get_qs_kind(my_kind_set(ikind), basis_set=basis_1c, basis_type="GAPW_1C")

            IF (.NOT. paw_atom) CYCLE

            nr = grid_atom%nr
            na = grid_atom%ng_sphere

            ! Prepare the structures needed to calculate and store the xc derivatives

            ! Array dimension: here anly one dimensional arrays are used,
            ! i.e. only the first column of deriv_data is read.
            ! The other to dimensions  are set to size equal 1
            bounds(1:2, 1:3) = 1
            bounds(2, 1) = na
            bounds(2, 2) = nr

            ! create a place where to put the derivatives
            CALL xc_dset_create(deriv_set, local_bounds=bounds)
            ! create the place where to store the argument for the functionals
            CALL xc_rho_set_create(rho_set_h, bounds, rho_cutoff=density_cut, &
                                   drho_cutoff=gradient_cut, tau_cutoff=tau_cut)
            CALL xc_rho_set_create(rho_set_s, bounds, rho_cutoff=density_cut, &
                                   drho_cutoff=gradient_cut, tau_cutoff=tau_cut)

            ! allocate the required 3d arrays where to store rho and drho
            CALL xc_rho_set_atom_update(rho_set_h, needs, nspins, bounds)
            CALL xc_rho_set_atom_update(rho_set_s, needs, nspins, bounds)

            CALL reallocate(rho_h, 1, na, 1, nr, 1, nspins)
            CALL reallocate(rho_s, 1, na, 1, nr, 1, nspins)
            weight => grid_atom%weight
            CALL reallocate(vxc_h, 1, na, 1, nr, 1, nspins)
            CALL reallocate(vxc_s, 1, na, 1, nr, 1, nspins)
            !
            IF (gradient_f) THEN
               CALL reallocate(drho_h, 1, 4, 1, na, 1, nr, 1, nspins)
               CALL reallocate(drho_s, 1, 4, 1, na, 1, nr, 1, nspins)
               CALL reallocate(vxg_h, 1, 3, 1, na, 1, nr, 1, nspins)
               CALL reallocate(vxg_s, 1, 3, 1, na, 1, nr, 1, nspins)
            END IF

            IF (tau_f) THEN
               CALL reallocate(tau_h, 1, na, 1, nr, 1, nspins)
               CALL reallocate(tau_s, 1, na, 1, nr, 1, nspins)
               CALL reallocate(vtau_h, 1, na, 1, nr, 1, nspins)
               CALL reallocate(vtau_s, 1, na, 1, nr, 1, nspins)
            END IF

            ! NLCC: prepare rho and drho of the core charge for this KIND
            donlcc = .FALSE.
            IF (nlcc) THEN
               NULLIFY (rho_nlcc)
               rho_nlcc => my_kind_set(ikind)%nlcc_pot
               IF (ASSOCIATED(rho_nlcc)) donlcc = .TRUE.
            END IF

            ! Distribute the atoms of this kind

            num_pe = para_env%num_pe
            bo = get_limit(natom, para_env%num_pe, para_env%mepos)

            DO iat = bo(1), bo(2)
               iatom = atom_list(iat)

               my_rho_atom_set(iatom)%exc_h = 0.0_dp
               my_rho_atom_set(iatom)%exc_s = 0.0_dp

               rho_atom => my_rho_atom_set(iatom)
               rho_h = 0.0_dp
               rho_s = 0.0_dp
               IF (gradient_f) THEN
                  NULLIFY (r_h, r_s, dr_h, dr_s, r_h_d, r_s_d)
                  CALL get_rho_atom(rho_atom=rho_atom, rho_rad_h=r_h, &
                                    rho_rad_s=r_s, drho_rad_h=dr_h, &
                                    drho_rad_s=dr_s, rho_rad_h_d=r_h_d, &
                                    rho_rad_s_d=r_s_d)
                  drho_h = 0.0_dp
                  drho_s = 0.0_dp
               ELSE
                  NULLIFY (r_h, r_s)
                  CALL get_rho_atom(rho_atom=rho_atom, rho_rad_h=r_h, rho_rad_s=r_s)
                  rho_d = 0.0_dp
               END IF
               IF (tau_f) THEN
                  !compute tau on the grid all at once
                  CALL calc_tau_atom(tau_h, tau_s, rho_atom, my_kind_set(ikind), nspins)
               ELSE
                  tau_d = 0.0_dp
               END IF

               DO ir = 1, nr
                  CALL calc_rho_angular(grid_atom, harmonics, nspins, gradient_f, &
                                        ir, r_h, r_s, rho_h, rho_s, dr_h, dr_s, &
                                        r_h_d, r_s_d, drho_h, drho_s)
                  IF (donlcc) THEN
                     CALL calc_rho_nlcc(grid_atom, nspins, gradient_f, &
                                        ir, rho_nlcc(:, 1), rho_h, rho_s, rho_nlcc(:, 2), drho_h, drho_s)
                  END IF
               END DO
               DO ir = 1, nr
                  IF (tau_f) THEN
                     CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, drho_h, tau_h, na, ir)
                     CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, drho_s, tau_s, na, ir)
                  ELSE IF (gradient_f) THEN
                     CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, drho_h, tau_d, na, ir)
                     CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, drho_s, tau_d, na, ir)
                  ELSE
                     CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, rho_d, tau_d, na, ir)
                     CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, rho_d, tau_d, na, ir)
                  END IF
               END DO

               !-------------------!
               ! hard atom density !
               !-------------------!
               CALL xc_dset_zero_all(deriv_set)
               CALL vxc_of_r_new(xc_fun_section, rho_set_h, deriv_set, 1, needs, weight, &
                                 lsd, na, nr, exc_h, vxc_h, vxg_h, vtau_h, energy_only=energy_only, &
                                 epr_xc=epr_xc, adiabatic_rescale_factor=my_adiabatic_rescale_factor)
               rho_atom%exc_h = rho_atom%exc_h + exc_h

               !-------------------!
               ! soft atom density !
               !-------------------!
               CALL xc_dset_zero_all(deriv_set)
               CALL vxc_of_r_new(xc_fun_section, rho_set_s, deriv_set, 1, needs, weight, &
                                 lsd, na, nr, exc_s, vxc_s, vxg_s, vtau_s, energy_only=energy_only, &
                                 epr_xc=epr_xc, adiabatic_rescale_factor=my_adiabatic_rescale_factor)
               rho_atom%exc_s = rho_atom%exc_s + exc_s

               IF (epr_xc) THEN
                  DO ispin = 1, nspins
                     DO idir = 1, 3
                        DO ir = 1, nr
                           DO ia = 1, na
                              gradient_atom_set(iatom)%nablavks_vec_rad_h(idir, ispin)%r_coef(ir, ia) = &
                                 gradient_atom_set(iatom)%nablavks_vec_rad_h(idir, ispin)%r_coef(ir, ia) &
                                 + vxg_h(idir, ia, ir, ispin)
                              gradient_atom_set(iatom)%nablavks_vec_rad_s(idir, ispin)%r_coef(ir, ia) = &
                                 gradient_atom_set(iatom)%nablavks_vec_rad_s(idir, ispin)%r_coef(ir, ia) &
                                 + vxg_s(idir, ia, ir, ispin)
                           END DO ! ia
                        END DO ! ir
                     END DO ! idir
                  END DO ! ispin
               END IF

               ! Add contributions to the exc energy

               exc1 = exc1 + rho_atom%exc_h - rho_atom%exc_s

               ! Integration to get the matrix elements relative to the vxc_atom
               ! here the products with the primitives is done: gaVxcgb
               ! internal transformation to get the integral in cartesian Gaussians

               IF (.NOT. energy_only) THEN
                  NULLIFY (int_hh, int_ss)
                  CALL get_rho_atom(rho_atom=rho_atom, ga_Vlocal_gb_h=int_hh, ga_Vlocal_gb_s=int_ss)
                  IF (gradient_f) THEN
                     CALL gaVxcgb_GC(vxc_h, vxc_s, vxg_h, vxg_s, int_hh, int_ss, &
                                     grid_atom, basis_1c, harmonics, nspins)
                  ELSE
                     CALL gaVxcgb_noGC(vxc_h, vxc_s, int_hh, int_ss, &
                                       grid_atom, basis_1c, harmonics, nspins)
                  END IF
                  IF (tau_f) THEN
                     CALL dgaVtaudgb(vtau_h, vtau_s, int_hh, int_ss, &
                                     grid_atom, basis_1c, harmonics, nspins)
                  END IF
               END IF ! energy_only
               NULLIFY (r_h, r_s, dr_h, dr_s)
            END DO ! iat

            ! Release the xc structure used to store the xc derivatives
            CALL xc_dset_release(deriv_set)
            CALL xc_rho_set_release(rho_set_h)
            CALL xc_rho_set_release(rho_set_s)

         END DO ! ikind

         CALL para_env%sum(exc1)

         IF (ASSOCIATED(rho_h)) DEALLOCATE (rho_h)
         IF (ASSOCIATED(rho_s)) DEALLOCATE (rho_s)
         IF (ASSOCIATED(vxc_h)) DEALLOCATE (vxc_h)
         IF (ASSOCIATED(vxc_s)) DEALLOCATE (vxc_s)

         IF (gradient_f) THEN
            IF (ASSOCIATED(drho_h)) DEALLOCATE (drho_h)
            IF (ASSOCIATED(drho_s)) DEALLOCATE (drho_s)
            IF (ASSOCIATED(vxg_h)) DEALLOCATE (vxg_h)
            IF (ASSOCIATED(vxg_s)) DEALLOCATE (vxg_s)
         END IF

         IF (tau_f) THEN
            IF (ASSOCIATED(tau_h)) DEALLOCATE (tau_h)
            IF (ASSOCIATED(tau_s)) DEALLOCATE (tau_s)
            IF (ASSOCIATED(vtau_h)) DEALLOCATE (vtau_h)
            IF (ASSOCIATED(vtau_s)) DEALLOCATE (vtau_s)
         END IF

      END IF !xc_none

      CALL timestop(handle)

   END SUBROUTINE calculate_vxc_atom

! **************************************************************************************************
!> \brief ...
!> \param rho_atom_set ...
!> \param rho1_atom_set ...
!> \param qs_env ...
!> \param xc_section ...
!> \param para_env ...
!> \param do_tddfpt2 New implementation of TDDFT.
!> \param do_triplet ...
!> \param kind_set_external ...
! **************************************************************************************************
   SUBROUTINE calculate_xc_2nd_deriv_atom(rho_atom_set, rho1_atom_set, qs_env, xc_section, para_env, &
                                          do_tddfpt2, do_triplet, kind_set_external)

      TYPE(rho_atom_type), DIMENSION(:), POINTER         :: rho_atom_set, rho1_atom_set
      TYPE(qs_environment_type), POINTER                 :: qs_env
      TYPE(section_vals_type), POINTER                   :: xc_section
      TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
      LOGICAL, INTENT(IN), OPTIONAL                      :: do_tddfpt2, do_triplet
      TYPE(qs_kind_type), DIMENSION(:), OPTIONAL, &
         POINTER                                         :: kind_set_external

      CHARACTER(LEN=*), PARAMETER :: routineN = 'calculate_xc_2nd_deriv_atom'

      INTEGER                                            :: atom, handle, iatom, ikind, ir, na, &
                                                            natom, nr, nspins
      INTEGER, DIMENSION(2)                              :: local_loop_limit
      INTEGER, DIMENSION(2, 3)                           :: bounds
      INTEGER, DIMENSION(:), POINTER                     :: atom_list
      LOGICAL                                            :: gradient_functional, lsd, paw_atom, &
                                                            scale_rho, tau_f
      REAL(KIND=dp)                                      :: density_cut, gradient_cut, rtot, tau_cut
      REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
         POINTER                                         :: vxc_h, vxc_s
      REAL(KIND=dp), DIMENSION(1, 1, 1)                  :: rtau
      REAL(KIND=dp), DIMENSION(1, 1, 1, 1)               :: rrho
      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: weight
      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: rho1_h, rho1_s, rho_h, rho_s, tau1_h, &
                                                            tau1_s, tau_h, tau_s
      REAL(KIND=dp), DIMENSION(:, :, :, :), POINTER      :: drho1_h, drho1_s, drho_h, drho_s, vxg_h, &
                                                            vxg_s
      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(gto_basis_set_type), POINTER                  :: basis_1c
      TYPE(harmonics_atom_type), POINTER                 :: harmonics
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: my_kind_set, qs_kind_set
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: dr1_h, dr1_s, dr_h, dr_s, int_hh, &
                                                            int_ss, r1_h, r1_s, r_h, r_s
      TYPE(rho_atom_coeff), DIMENSION(:, :), POINTER     :: r1_h_d, r1_s_d, r_h_d, r_s_d
      TYPE(rho_atom_type), POINTER                       :: rho1_atom, rho_atom
      TYPE(section_vals_type), POINTER                   :: input, xc_fun_section
      TYPE(xc_derivative_set_type)                       :: deriv_set
      TYPE(xc_rho_cflags_type)                           :: needs
      TYPE(xc_rho_set_type)                              :: rho1_set_h, rho1_set_s, rho_set_h, &
                                                            rho_set_s

! -------------------------------------------------------------------------

      CALL timeset(routineN, handle)

      NULLIFY (qs_kind_set)
      NULLIFY (rho_h, rho_s, drho_h, drho_s, weight)
      NULLIFY (rho1_h, rho1_s, drho1_h, drho1_s)
      NULLIFY (vxc_h, vxc_s, vxg_h, vxg_s)
      NULLIFY (tau_h, tau_s, tau1_h, tau1_s)

      CALL get_qs_env(qs_env=qs_env, &
                      input=input, &
                      qs_kind_set=qs_kind_set, &
                      atomic_kind_set=atomic_kind_set)

      IF (PRESENT(kind_set_external)) THEN
         my_kind_set => kind_set_external
      ELSE
         my_kind_set => qs_kind_set
      END IF

      CALL section_vals_val_get(input, "DFT%LSD", l_val=lsd)
      CALL section_vals_val_get(xc_section, "DENSITY_CUTOFF", &
                                r_val=density_cut)
      CALL section_vals_val_get(xc_section, "GRADIENT_CUTOFF", &
                                r_val=gradient_cut)
      CALL section_vals_val_get(xc_section, "TAU_CUTOFF", &
                                r_val=tau_cut)

      xc_fun_section => section_vals_get_subs_vals(xc_section, &
                                                   "XC_FUNCTIONAL")
      IF (lsd) THEN
         nspins = 2
      ELSE
         nspins = 1
      END IF

      scale_rho = .FALSE.
      IF (PRESENT(do_tddfpt2) .AND. PRESENT(do_triplet)) THEN
         IF (nspins == 1 .AND. do_triplet) THEN
            lsd = .TRUE.
            scale_rho = .TRUE.
         END IF
      ELSEIF (PRESENT(do_triplet)) THEN
         IF (nspins == 1 .AND. do_triplet) lsd = .TRUE.
      END IF

      needs = xc_functionals_get_needs(xc_fun_section, lsd=lsd, &
                                       calc_potential=.TRUE.)
      gradient_functional = needs%drho .OR. needs%drho_spin
      tau_f = (needs%tau .OR. needs%tau_spin)
      IF (tau_f) THEN
         CPABORT("Tau functionals not implemented for GAPW 2nd derivatives")
      ELSE
         rtau = 0.0_dp
      END IF

      !  Here starts the loop over all the atoms
      DO ikind = 1, SIZE(atomic_kind_set)

         NULLIFY (atom_list, harmonics, grid_atom)
         CALL get_atomic_kind(atomic_kind_set(ikind), atom_list=atom_list, natom=natom)
         CALL get_qs_kind(my_kind_set(ikind), paw_atom=paw_atom, &
                          harmonics=harmonics, grid_atom=grid_atom)
         CALL get_qs_kind(my_kind_set(ikind), basis_set=basis_1c, basis_type="GAPW_1C")
         IF (.NOT. paw_atom) CYCLE

         nr = grid_atom%nr
         na = grid_atom%ng_sphere

         ! Array dimension: here anly one dimensional arrays are used,
         ! i.e. only the first column of deriv_data is read.
         ! The other to dimensions  are set to size equal 1.
         bounds(1:2, 1:3) = 1
         bounds(2, 1) = na
         bounds(2, 2) = nr

         CALL xc_dset_create(deriv_set, local_bounds=bounds)
         CALL xc_rho_set_create(rho_set_h, bounds, rho_cutoff=density_cut, &
                                drho_cutoff=gradient_cut, tau_cutoff=tau_cut)
         CALL xc_rho_set_create(rho_set_s, bounds, rho_cutoff=density_cut, &
                                drho_cutoff=gradient_cut, tau_cutoff=tau_cut)
         CALL xc_rho_set_create(rho1_set_h, bounds, rho_cutoff=density_cut, &
                                drho_cutoff=gradient_cut, tau_cutoff=tau_cut)
         CALL xc_rho_set_create(rho1_set_s, bounds, rho_cutoff=density_cut, &
                                drho_cutoff=gradient_cut, tau_cutoff=tau_cut)

         ! allocate the required 3d arrays where to store rho and drho
         IF (nspins == 1 .AND. .NOT. lsd) THEN
            CALL xc_rho_set_atom_update(rho_set_h, needs, 1, bounds)
            CALL xc_rho_set_atom_update(rho1_set_h, needs, 1, bounds)
            CALL xc_rho_set_atom_update(rho_set_s, needs, 1, bounds)
            CALL xc_rho_set_atom_update(rho1_set_s, needs, 1, bounds)
         ELSE
            CALL xc_rho_set_atom_update(rho_set_h, needs, 2, bounds)
            CALL xc_rho_set_atom_update(rho1_set_h, needs, 2, bounds)
            CALL xc_rho_set_atom_update(rho_set_s, needs, 2, bounds)
            CALL xc_rho_set_atom_update(rho1_set_s, needs, 2, bounds)
         END IF

         ALLOCATE (rho_h(1:na, 1:nr, 1:nspins), rho1_h(1:na, 1:nr, 1:nspins), &
                   rho_s(1:na, 1:nr, 1:nspins), rho1_s(1:na, 1:nr, 1:nspins))

         ALLOCATE (vxc_h(1:na, 1:nr, 1:nspins), vxc_s(1:na, 1:nr, 1:nspins))
         vxc_h = 0.0_dp
         vxc_s = 0.0_dp

         weight => grid_atom%weight

         IF (gradient_functional) THEN
            ALLOCATE (drho_h(1:4, 1:na, 1:nr, 1:nspins), drho1_h(1:4, 1:na, 1:nr, 1:nspins), &
                      drho_s(1:4, 1:na, 1:nr, 1:nspins), drho1_s(1:4, 1:na, 1:nr, 1:nspins))
            ALLOCATE (vxg_h(1:3, 1:na, 1:nr, 1:nspins), vxg_s(1:3, 1:na, 1:nr, 1:nspins))
         ELSE
            ALLOCATE (drho_h(1, 1, 1, 1), drho1_h(1, 1, 1, 1), &
                      drho_s(1, 1, 1, 1), drho1_s(1, 1, 1, 1))
            ALLOCATE (vxg_h(1, 1, 1, 1), vxg_s(1, 1, 1, 1))
            rrho = 0.0_dp
         END IF
         vxg_h = 0.0_dp
         vxg_s = 0.0_dp

         ! parallelization
         local_loop_limit = get_limit(natom, para_env%num_pe, para_env%mepos)

         DO iatom = local_loop_limit(1), local_loop_limit(2) !1,natom
            atom = atom_list(iatom)

            rho_atom_set(atom)%exc_h = 0.0_dp
            rho_atom_set(atom)%exc_s = 0.0_dp
            rho1_atom_set(atom)%exc_h = 0.0_dp
            rho1_atom_set(atom)%exc_s = 0.0_dp

            rho_atom => rho_atom_set(atom)
            rho1_atom => rho1_atom_set(atom)
            NULLIFY (r_h, r_s, dr_h, dr_s, r_h_d, r_s_d)
            NULLIFY (r1_h, r1_s, dr1_h, dr1_s, r1_h_d, r1_s_d)
            rho_h = 0.0_dp
            rho_s = 0.0_dp
            rho1_h = 0.0_dp
            rho1_s = 0.0_dp
            IF (gradient_functional) THEN
               CALL get_rho_atom(rho_atom=rho_atom, &
                                 rho_rad_h=r_h, rho_rad_s=r_s, &
                                 drho_rad_h=dr_h, drho_rad_s=dr_s, &
                                 rho_rad_h_d=r_h_d, rho_rad_s_d=r_s_d)
               CALL get_rho_atom(rho_atom=rho1_atom, &
                                 rho_rad_h=r1_h, rho_rad_s=r1_s, &
                                 drho_rad_h=dr1_h, drho_rad_s=dr1_s, &
                                 rho_rad_h_d=r1_h_d, rho_rad_s_d=r1_s_d)
               drho_h = 0.0_dp; drho_s = 0.0_dp
               drho1_h = 0.0_dp; drho1_s = 0.0_dp
            ELSE
               CALL get_rho_atom(rho_atom=rho_atom, &
                                 rho_rad_h=r_h, rho_rad_s=r_s)
               CALL get_rho_atom(rho_atom=rho1_atom, &
                                 rho_rad_h=r1_h, rho_rad_s=r1_s)
            END IF

            rtot = 0.0_dp

            DO ir = 1, nr
               CALL calc_rho_angular(grid_atom, harmonics, nspins, gradient_functional, &
                                     ir, r_h, r_s, rho_h, rho_s, dr_h, dr_s, r_h_d, r_s_d, &
                                     drho_h, drho_s)
               CALL calc_rho_angular(grid_atom, harmonics, nspins, gradient_functional, &
                                     ir, r1_h, r1_s, rho1_h, rho1_s, dr1_h, dr1_s, r1_h_d, r1_s_d, &
                                     drho1_h, drho1_s)
            END DO
            IF (scale_rho) THEN
               rho_h = 2.0_dp*rho_h
               rho_s = 2.0_dp*rho_s
               IF (gradient_functional) THEN
                  drho_h = 2.0_dp*drho_h
                  drho_s = 2.0_dp*drho_s
               END IF
            END IF

            DO ir = 1, nr
               IF (tau_f) THEN
                  CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, drho_h, tau_h, na, ir)
                  CALL fill_rho_set(rho1_set_h, lsd, nspins, needs, rho1_h, drho1_h, tau1_h, na, ir)
                  CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, drho_s, tau_s, na, ir)
                  CALL fill_rho_set(rho1_set_s, lsd, nspins, needs, rho1_s, drho1_s, tau1_s, na, ir)
               ELSE IF (gradient_functional) THEN
                  CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, drho_h, rtau, na, ir)
                  CALL fill_rho_set(rho1_set_h, lsd, nspins, needs, rho1_h, drho1_h, rtau, na, ir)
                  CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, drho_s, rtau, na, ir)
                  CALL fill_rho_set(rho1_set_s, lsd, nspins, needs, rho1_s, drho1_s, rtau, na, ir)
               ELSE
                  CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, rrho, rtau, na, ir)
                  CALL fill_rho_set(rho1_set_h, lsd, nspins, needs, rho1_h, rrho, rtau, na, ir)
                  CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, rrho, rtau, na, ir)
                  CALL fill_rho_set(rho1_set_s, lsd, nspins, needs, rho1_s, rrho, rtau, na, ir)
               END IF
            END DO

            CALL xc_2nd_deriv_of_r(xc_section=xc_section, &
                                   rho_set=rho_set_h, rho1_set=rho1_set_h, &
                                   deriv_set=deriv_set, &
                                   w=weight, vxc=vxc_h, vxg=vxg_h, do_triplet=do_triplet)
            CALL xc_2nd_deriv_of_r(xc_section=xc_section, &
                                   rho_set=rho_set_s, rho1_set=rho1_set_s, &
                                   deriv_set=deriv_set, &
                                   w=weight, vxc=vxc_s, vxg=vxg_s, do_triplet=do_triplet)

            CALL get_rho_atom(rho_atom=rho1_atom, ga_Vlocal_gb_h=int_hh, ga_Vlocal_gb_s=int_ss)
            IF (gradient_functional) THEN
               CALL gaVxcgb_GC(vxc_h, vxc_s, vxg_h, vxg_s, int_hh, int_ss, &
                               grid_atom, basis_1c, harmonics, nspins)
            ELSE
               CALL gaVxcgb_noGC(vxc_h, vxc_s, int_hh, int_ss, &
                                 grid_atom, basis_1c, harmonics, nspins)
            END IF

            NULLIFY (r_h, r_s, dr_h, dr_s)

         END DO

         ! some cleanup
         NULLIFY (weight)
         DEALLOCATE (rho_h, rho_s, rho1_h, rho1_s, vxc_h, vxc_s)
         DEALLOCATE (drho_h, drho_s, vxg_h, vxg_s)
         DEALLOCATE (drho1_h, drho1_s)

         CALL xc_dset_release(deriv_set)
         CALL xc_rho_set_release(rho_set_h)
         CALL xc_rho_set_release(rho1_set_h)
         CALL xc_rho_set_release(rho_set_s)
         CALL xc_rho_set_release(rho1_set_s)

      END DO

      CALL timestop(handle)

   END SUBROUTINE calculate_xc_2nd_deriv_atom

! **************************************************************************************************
!> \brief ...
!> \param qs_env ...
!> \param rho0_atom_set ...
!> \param rho1_atom_set ...
!> \param rho2_atom_set ...
!> \param kind_set ...
!> \param xc_section ...
!> \param is_triplet ...
!> \param accuracy ...
! **************************************************************************************************
   SUBROUTINE calculate_gfxc_atom(qs_env, rho0_atom_set, rho1_atom_set, rho2_atom_set, &
                                  kind_set, xc_section, is_triplet, accuracy)

      TYPE(qs_environment_type), POINTER                 :: qs_env
      TYPE(rho_atom_type), DIMENSION(:), POINTER         :: rho0_atom_set, rho1_atom_set, &
                                                            rho2_atom_set
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: kind_set
      TYPE(section_vals_type), OPTIONAL, POINTER         :: xc_section
      LOGICAL, INTENT(IN)                                :: is_triplet
      INTEGER, INTENT(IN)                                :: accuracy

      CHARACTER(LEN=*), PARAMETER :: routineN = 'calculate_gfxc_atom'
      REAL(KIND=dp), PARAMETER                           :: epsrho = 5.e-4_dp

      INTEGER                                            :: bo(2), handle, iat, iatom, ikind, ir, &
                                                            istep, mspins, myfun, na, natom, nf, &
                                                            nr, ns, nspins, nstep, num_pe
      INTEGER, DIMENSION(2, 3)                           :: bounds
      INTEGER, DIMENSION(:), POINTER                     :: atom_list
      LOGICAL                                            :: donlcc, gradient_f, lsd, nlcc, paw_atom, &
                                                            tau_f
      REAL(dp)                                           :: beta, density_cut, exc_h, exc_s, &
                                                            gradient_cut, oeps1, oeps2, tau_cut
      REAL(dp), DIMENSION(1, 1, 1)                       :: tau_d
      REAL(dp), DIMENSION(1, 1, 1, 1)                    :: rho_d
      REAL(dp), DIMENSION(:, :), POINTER                 :: rho_nlcc, weight
      REAL(dp), DIMENSION(:, :, :), POINTER              :: rho0_h, rho0_s, rho1_h, rho1_s, rho_h, &
                                                            rho_s, tau0_h, tau0_s, tau1_h, tau1_s, &
                                                            tau_h, tau_s, vtau_h, vtau_s, vxc_h, &
                                                            vxc_s
      REAL(dp), DIMENSION(:, :, :, :), POINTER           :: drho0_h, drho0_s, drho1_h, drho1_s, &
                                                            drho_h, drho_s, vxg_h, vxg_s
      REAL(KIND=dp), DIMENSION(-4:4)                     :: ak, bl
      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(gto_basis_set_type), POINTER                  :: basis_1c
      TYPE(harmonics_atom_type), POINTER                 :: harmonics
      TYPE(mp_para_env_type), POINTER                    :: para_env
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: dr_h, dr_s, fint_hh, fint_ss, int_hh, &
                                                            int_ss, r_h, r_s
      TYPE(rho_atom_coeff), DIMENSION(:, :), POINTER     :: r_h_d, r_s_d
      TYPE(rho_atom_type), POINTER                       :: rho0_atom, rho1_atom, rho2_atom
      TYPE(section_vals_type), POINTER                   :: xc_fun_section
      TYPE(xc_derivative_set_type)                       :: deriv_set
      TYPE(xc_rho_cflags_type)                           :: needs
      TYPE(xc_rho_set_type)                              :: rho_set_h, rho_set_s

      CALL timeset(routineN, handle)

      ak = 0.0_dp
      bl = 0.0_dp
      SELECT CASE (accuracy)
      CASE (:4)
         nstep = 2
         ak(-2:2) = (/1.0_dp, -8.0_dp, 0.0_dp, 8.0_dp, -1.0_dp/)/12.0_dp
         bl(-2:2) = (/-1.0_dp, 16.0_dp, -30.0_dp, 16.0_dp, -1.0_dp/)/12.0_dp
      CASE (5:7)
         nstep = 3
         ak(-3:3) = (/-1.0_dp, 9.0_dp, -45.0_dp, 0.0_dp, 45.0_dp, -9.0_dp, 1.0_dp/)/60.0_dp
         bl(-3:3) = (/2.0_dp, -27.0_dp, 270.0_dp, -490.0_dp, 270.0_dp, -27.0_dp, 2.0_dp/)/180.0_dp
      CASE (8:)
         nstep = 4
         ak(-4:4) = (/1.0_dp, -32.0_dp/3.0_dp, 56.0_dp, -224.0_dp, 0.0_dp, &
                      224.0_dp, -56.0_dp, 32.0_dp/3.0_dp, -1.0_dp/)/280.0_dp
         bl(-4:4) = (/-1.0_dp, 128.0_dp/9.0_dp, -112.0_dp, 896.0_dp, -14350.0_dp/9.0_dp, &
                      896.0_dp, -112.0_dp, 128.0_dp/9.0_dp, -1.0_dp/)/560.0_dp
      END SELECT
      oeps1 = 1.0_dp/epsrho
      oeps2 = 1.0_dp/(epsrho**2)

      CALL get_qs_env(qs_env=qs_env, &
                      dft_control=dft_control, &
                      para_env=para_env, &
                      atomic_kind_set=atomic_kind_set)

      xc_fun_section => section_vals_get_subs_vals(xc_section, "XC_FUNCTIONAL")
      CALL section_vals_val_get(xc_fun_section, "_SECTION_PARAMETERS_", i_val=myfun)

      IF (myfun == xc_none) THEN
         ! no action needed?
      ELSE
         CALL section_vals_val_get(xc_section, "DENSITY_CUTOFF", r_val=density_cut)
         CALL section_vals_val_get(xc_section, "GRADIENT_CUTOFF", r_val=gradient_cut)
         CALL section_vals_val_get(xc_section, "TAU_CUTOFF", r_val=tau_cut)

         nlcc = has_nlcc(kind_set)
         lsd = dft_control%lsd
         nspins = dft_control%nspins
         mspins = nspins
         IF (is_triplet) THEN
            CPASSERT(nspins == 1)
            lsd = .TRUE.
            mspins = 2
         END IF
         needs = xc_functionals_get_needs(xc_fun_section, lsd=lsd, calc_potential=.TRUE.)
         gradient_f = (needs%drho .OR. needs%drho_spin)
         tau_f = (needs%tau .OR. needs%tau_spin)

         ! Here starts the loop over all the atoms
         DO ikind = 1, SIZE(atomic_kind_set)
            CALL get_atomic_kind(atomic_kind_set(ikind), atom_list=atom_list, natom=natom)
            CALL get_qs_kind(kind_set(ikind), paw_atom=paw_atom, &
                             harmonics=harmonics, grid_atom=grid_atom)
            CALL get_qs_kind(kind_set(ikind), basis_set=basis_1c, basis_type="GAPW_1C")

            IF (.NOT. paw_atom) CYCLE

            nr = grid_atom%nr
            na = grid_atom%ng_sphere

            ! Prepare the structures needed to calculate and store the xc derivatives

            ! Array dimension: here anly one dimensional arrays are used,
            ! i.e. only the first column of deriv_data is read.
            ! The other to dimensions  are set to size equal 1
            bounds(1:2, 1:3) = 1
            bounds(2, 1) = na
            bounds(2, 2) = nr

            ! create a place where to put the derivatives
            CALL xc_dset_create(deriv_set, local_bounds=bounds)
            ! create the place where to store the argument for the functionals
            CALL xc_rho_set_create(rho_set_h, bounds, rho_cutoff=density_cut, &
                                   drho_cutoff=gradient_cut, tau_cutoff=tau_cut)
            CALL xc_rho_set_create(rho_set_s, bounds, rho_cutoff=density_cut, &
                                   drho_cutoff=gradient_cut, tau_cutoff=tau_cut)

            ! allocate the required 3d arrays where to store rho and drho
            CALL xc_rho_set_atom_update(rho_set_h, needs, mspins, bounds)
            CALL xc_rho_set_atom_update(rho_set_s, needs, mspins, bounds)

            weight => grid_atom%weight

            ALLOCATE (rho_h(na, nr, mspins), rho_s(na, nr, mspins), &
                      rho0_h(na, nr, nspins), rho0_s(na, nr, nspins), &
                      rho1_h(na, nr, nspins), rho1_s(na, nr, nspins))
            ALLOCATE (vxc_h(na, nr, mspins), vxc_s(na, nr, mspins))
            IF (gradient_f) THEN
               ALLOCATE (drho_h(4, na, nr, mspins), drho_s(4, na, nr, mspins), &
                         drho0_h(4, na, nr, nspins), drho0_s(4, na, nr, nspins), &
                         drho1_h(4, na, nr, nspins), drho1_s(4, na, nr, nspins))
               ALLOCATE (vxg_h(3, na, nr, mspins), vxg_s(3, na, nr, mspins))
            END IF
            IF (tau_f) THEN
               ALLOCATE (tau_h(na, nr, mspins), tau_s(na, nr, mspins), &
                         tau0_h(na, nr, nspins), tau0_s(na, nr, nspins), &
                         tau1_h(na, nr, nspins), tau1_s(na, nr, nspins))
               ALLOCATE (vtau_h(na, nr, mspins), vtau_s(na, nr, mspins))
            END IF
            !
            ! NLCC: prepare rho and drho of the core charge for this KIND
            donlcc = .FALSE.
            IF (nlcc) THEN
               NULLIFY (rho_nlcc)
               rho_nlcc => kind_set(ikind)%nlcc_pot
               IF (ASSOCIATED(rho_nlcc)) donlcc = .TRUE.
            END IF

            ! Distribute the atoms of this kind
            num_pe = para_env%num_pe
            bo = get_limit(natom, num_pe, para_env%mepos)

            DO iat = bo(1), bo(2)
               iatom = atom_list(iat)
               !
               NULLIFY (int_hh, int_ss)
               rho0_atom => rho0_atom_set(iatom)
               CALL get_rho_atom(rho_atom=rho0_atom, ga_Vlocal_gb_h=int_hh, ga_Vlocal_gb_s=int_ss)
               ALLOCATE (fint_ss(nspins), fint_hh(nspins))
               DO ns = 1, nspins
                  nf = SIZE(int_ss(ns)%r_coef, 1)
                  ALLOCATE (fint_ss(ns)%r_coef(nf, nf))
                  nf = SIZE(int_hh(ns)%r_coef, 1)
                  ALLOCATE (fint_hh(ns)%r_coef(nf, nf))
               END DO

               ! RHO0
               rho0_h = 0.0_dp
               rho0_s = 0.0_dp
               rho0_atom => rho0_atom_set(iatom)
               IF (gradient_f) THEN
                  NULLIFY (r_h, r_s, dr_h, dr_s, r_h_d, r_s_d)
                  CALL get_rho_atom(rho_atom=rho0_atom, rho_rad_h=r_h, rho_rad_s=r_s, drho_rad_h=dr_h, &
                                    drho_rad_s=dr_s, rho_rad_h_d=r_h_d, rho_rad_s_d=r_s_d)
                  drho0_h = 0.0_dp
                  drho0_s = 0.0_dp
               ELSE
                  NULLIFY (r_h, r_s)
                  CALL get_rho_atom(rho_atom=rho0_atom, rho_rad_h=r_h, rho_rad_s=r_s)
                  rho_d = 0.0_dp
               END IF
               DO ir = 1, nr
                  CALL calc_rho_angular(grid_atom, harmonics, nspins, gradient_f, &
                                        ir, r_h, r_s, rho0_h, rho0_s, dr_h, dr_s, &
                                        r_h_d, r_s_d, drho0_h, drho0_s)
                  IF (donlcc) THEN
                     CALL calc_rho_nlcc(grid_atom, nspins, gradient_f, &
                                        ir, rho_nlcc(:, 1), rho0_h, rho0_s, rho_nlcc(:, 2), drho0_h, drho0_s)
                  END IF
               END DO
               IF (tau_f) THEN
                  !compute tau on the grid all at once
                  CALL calc_tau_atom(tau0_h, tau0_s, rho0_atom, kind_set(ikind), nspins)
               ELSE
                  tau_d = 0.0_dp
               END IF
               ! RHO1
               rho1_h = 0.0_dp
               rho1_s = 0.0_dp
               rho1_atom => rho1_atom_set(iatom)
               IF (gradient_f) THEN
                  NULLIFY (r_h, r_s, dr_h, dr_s, r_h_d, r_s_d)
                  CALL get_rho_atom(rho_atom=rho1_atom, rho_rad_h=r_h, rho_rad_s=r_s, drho_rad_h=dr_h, &
                                    drho_rad_s=dr_s, rho_rad_h_d=r_h_d, rho_rad_s_d=r_s_d)
                  drho1_h = 0.0_dp
                  drho1_s = 0.0_dp
               ELSE
                  NULLIFY (r_h, r_s)
                  CALL get_rho_atom(rho_atom=rho1_atom, rho_rad_h=r_h, rho_rad_s=r_s)
               END IF
               DO ir = 1, nr
                  CALL calc_rho_angular(grid_atom, harmonics, nspins, gradient_f, &
                                        ir, r_h, r_s, rho1_h, rho1_s, dr_h, dr_s, &
                                        r_h_d, r_s_d, drho1_h, drho1_s)
               END DO
               IF (tau_f) THEN
                  !compute tau on the grid all at once
                  CALL calc_tau_atom(tau1_h, tau1_s, rho1_atom, kind_set(ikind), nspins)
               END IF
               ! RHO2
               rho2_atom => rho2_atom_set(iatom)

               DO istep = -nstep, nstep

                  beta = REAL(istep, KIND=dp)*epsrho

                  IF (is_triplet) THEN
                     rho_h(:, :, 1) = rho0_h(:, :, 1) + beta*rho1_h(:, :, 1)
                     rho_h(:, :, 2) = rho0_h(:, :, 1)
                     rho_h = 0.5_dp*rho_h
                     rho_s(:, :, 1) = rho0_s(:, :, 1) + beta*rho1_s(:, :, 1)
                     rho_s(:, :, 2) = rho0_s(:, :, 1)
                     rho_s = 0.5_dp*rho_s
                     IF (gradient_f) THEN
                        drho_h(:, :, :, 1) = drho0_h(:, :, :, 1) + beta*drho1_h(:, :, :, 1)
                        drho_h(:, :, :, 2) = drho0_h(:, :, :, 1)
                        drho_h = 0.5_dp*drho_h
                        drho_s(:, :, :, 1) = drho0_s(:, :, :, 1) + beta*drho1_s(:, :, :, 1)
                        drho_s(:, :, :, 2) = drho0_s(:, :, :, 1)
                        drho_s = 0.5_dp*drho_s
                     END IF
                     IF (tau_f) THEN
                        tau_h(:, :, 1) = tau0_h(:, :, 1) + beta*tau1_h(:, :, 1)
                        tau_h(:, :, 2) = tau0_h(:, :, 1)
                        tau_h = 0.5_dp*tau0_h
                        tau_s(:, :, 1) = tau0_s(:, :, 1) + beta*tau1_s(:, :, 1)
                        tau_s(:, :, 2) = tau0_s(:, :, 1)
                        tau_s = 0.5_dp*tau0_s
                     END IF
                  ELSE
                     rho_h = rho0_h + beta*rho1_h
                     rho_s = rho0_s + beta*rho1_s
                     IF (gradient_f) THEN
                        drho_h = drho0_h + beta*drho1_h
                        drho_s = drho0_s + beta*drho1_s
                     END IF
                     IF (tau_f) THEN
                        tau_h = tau0_h + beta*tau1_h
                        tau_s = tau0_s + beta*tau1_s
                     END IF
                  END IF
                  !
                  IF (gradient_f) THEN
                     drho_h(4, :, :, :) = SQRT( &
                                          drho_h(1, :, :, :)*drho_h(1, :, :, :) + &
                                          drho_h(2, :, :, :)*drho_h(2, :, :, :) + &
                                          drho_h(3, :, :, :)*drho_h(3, :, :, :))

                     drho_s(4, :, :, :) = SQRT( &
                                          drho_s(1, :, :, :)*drho_s(1, :, :, :) + &
                                          drho_s(2, :, :, :)*drho_s(2, :, :, :) + &
                                          drho_s(3, :, :, :)*drho_s(3, :, :, :))
                  END IF

                  DO ir = 1, nr
                     IF (tau_f) THEN
                        CALL fill_rho_set(rho_set_h, lsd, mspins, needs, rho_h, drho_h, tau_h, na, ir)
                        CALL fill_rho_set(rho_set_s, lsd, mspins, needs, rho_s, drho_s, tau_s, na, ir)
                     ELSE IF (gradient_f) THEN
                        CALL fill_rho_set(rho_set_h, lsd, mspins, needs, rho_h, drho_h, tau_d, na, ir)
                        CALL fill_rho_set(rho_set_s, lsd, mspins, needs, rho_s, drho_s, tau_d, na, ir)
                     ELSE
                        CALL fill_rho_set(rho_set_h, lsd, mspins, needs, rho_h, rho_d, tau_d, na, ir)
                        CALL fill_rho_set(rho_set_s, lsd, mspins, needs, rho_s, rho_d, tau_d, na, ir)
                     END IF
                  END DO

                  ! hard atom density !
                  CALL xc_dset_zero_all(deriv_set)
                  CALL vxc_of_r_new(xc_fun_section, rho_set_h, deriv_set, 1, needs, weight, &
                                    lsd, na, nr, exc_h, vxc_h, vxg_h, vtau_h)
                  IF (is_triplet) THEN
                     vxc_h(:, :, 1) = vxc_h(:, :, 1) - vxc_h(:, :, 2)
                     IF (gradient_f) THEN
                        vxg_h(:, :, :, 1) = vxg_h(:, :, :, 1) - vxg_h(:, :, :, 2)
                     END IF
                     IF (tau_f) THEN
                        vtau_h(:, :, 1) = vtau_h(:, :, 1) - vtau_h(:, :, 2)
                     END IF
                  END IF
                  ! soft atom density !
                  CALL xc_dset_zero_all(deriv_set)
                  CALL vxc_of_r_new(xc_fun_section, rho_set_s, deriv_set, 1, needs, weight, &
                                    lsd, na, nr, exc_s, vxc_s, vxg_s, vtau_s)
                  IF (is_triplet) THEN
                     vxc_s(:, :, 1) = vxc_s(:, :, 1) - vxc_s(:, :, 2)
                     IF (gradient_f) THEN
                        vxg_s(:, :, :, 1) = vxg_s(:, :, :, 1) - vxg_s(:, :, :, 2)
                     END IF
                     IF (tau_f) THEN
                        vtau_s(:, :, 1) = vtau_s(:, :, 1) - vtau_s(:, :, 2)
                     END IF
                  END IF
                  ! potentials
                  DO ns = 1, nspins
                     fint_hh(ns)%r_coef(:, :) = 0.0_dp
                     fint_ss(ns)%r_coef(:, :) = 0.0_dp
                  END DO
                  IF (gradient_f) THEN
                     CALL gaVxcgb_GC(vxc_h, vxc_s, vxg_h, vxg_s, fint_hh, fint_ss, &
                                     grid_atom, basis_1c, harmonics, nspins)
                  ELSE
                     CALL gaVxcgb_noGC(vxc_h, vxc_s, fint_hh, fint_ss, &
                                       grid_atom, basis_1c, harmonics, nspins)
                  END IF
                  IF (tau_f) THEN
                     CALL dgaVtaudgb(vtau_h, vtau_s, fint_hh, fint_ss, &
                                     grid_atom, basis_1c, harmonics, nspins)
                  END IF
                  ! first derivative fxc
                  NULLIFY (int_hh, int_ss)
                  CALL get_rho_atom(rho_atom=rho1_atom, ga_Vlocal_gb_h=int_hh, ga_Vlocal_gb_s=int_ss)
                  DO ns = 1, nspins
                     int_ss(ns)%r_coef(:, :) = int_ss(ns)%r_coef(:, :) + oeps1*ak(istep)*fint_ss(ns)%r_coef(:, :)
                     int_hh(ns)%r_coef(:, :) = int_hh(ns)%r_coef(:, :) + oeps1*ak(istep)*fint_hh(ns)%r_coef(:, :)
                  END DO
                  ! second derivative gxc
                  NULLIFY (int_hh, int_ss)
                  CALL get_rho_atom(rho_atom=rho2_atom, ga_Vlocal_gb_h=int_hh, ga_Vlocal_gb_s=int_ss)
                  DO ns = 1, nspins
                     int_ss(ns)%r_coef(:, :) = int_ss(ns)%r_coef(:, :) + oeps2*bl(istep)*fint_ss(ns)%r_coef(:, :)
                     int_hh(ns)%r_coef(:, :) = int_hh(ns)%r_coef(:, :) + oeps2*bl(istep)*fint_hh(ns)%r_coef(:, :)
                  END DO
               END DO
               !
               DO ns = 1, nspins
                  DEALLOCATE (fint_ss(ns)%r_coef)
                  DEALLOCATE (fint_hh(ns)%r_coef)
               END DO
               DEALLOCATE (fint_ss, fint_hh)

            END DO ! iat

            ! Release the xc structure used to store the xc derivatives
            CALL xc_dset_release(deriv_set)
            CALL xc_rho_set_release(rho_set_h)
            CALL xc_rho_set_release(rho_set_s)

            DEALLOCATE (rho_h, rho_s, rho0_h, rho0_s, rho1_h, rho1_s)
            DEALLOCATE (vxc_h, vxc_s)
            IF (gradient_f) THEN
               DEALLOCATE (drho_h, drho_s, drho0_h, drho0_s, drho1_h, drho1_s)
               DEALLOCATE (vxg_h, vxg_s)
            END IF
            IF (tau_f) THEN
               DEALLOCATE (tau_h, tau_s, tau0_h, tau0_s, tau1_h, tau1_s)
               DEALLOCATE (vtau_h, vtau_s)
            END IF

         END DO ! ikind

      END IF !xc_none

      CALL timestop(handle)

   END SUBROUTINE calculate_gfxc_atom

! **************************************************************************************************
!> \brief ...
!> \param qs_env ...
!> \param rho0_atom_set ...
!> \param rho1_atom_set ...
!> \param rho2_atom_set ...
!> \param kind_set ...
!> \param xc_section ...
!> \param is_triplet ...
!> \param accuracy ...
!> \param epsrho ...
! **************************************************************************************************
   SUBROUTINE gfxc_atom_diff(qs_env, rho0_atom_set, rho1_atom_set, rho2_atom_set, &
                             kind_set, xc_section, is_triplet, accuracy, epsrho)

      TYPE(qs_environment_type), POINTER                 :: qs_env
      TYPE(rho_atom_type), DIMENSION(:), POINTER         :: rho0_atom_set, rho1_atom_set, &
                                                            rho2_atom_set
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: kind_set
      TYPE(section_vals_type), OPTIONAL, POINTER         :: xc_section
      LOGICAL, INTENT(IN)                                :: is_triplet
      INTEGER, INTENT(IN)                                :: accuracy
      REAL(KIND=dp), INTENT(IN)                          :: epsrho

      CHARACTER(LEN=*), PARAMETER                        :: routineN = 'gfxc_atom_diff'

      INTEGER                                            :: bo(2), handle, iat, iatom, ikind, ir, &
                                                            istep, mspins, myfun, na, natom, nf, &
                                                            nr, ns, nspins, nstep, num_pe
      INTEGER, DIMENSION(2, 3)                           :: bounds
      INTEGER, DIMENSION(:), POINTER                     :: atom_list
      LOGICAL                                            :: donlcc, gradient_f, lsd, nlcc, paw_atom, &
                                                            tau_f
      REAL(dp)                                           :: beta, density_cut, gradient_cut, oeps1, &
                                                            tau_cut
      REAL(dp), CONTIGUOUS, DIMENSION(:, :, :), POINTER  :: vxc_h, vxc_s
      REAL(dp), DIMENSION(1, 1, 1)                       :: tau_d
      REAL(dp), DIMENSION(1, 1, 1, 1)                    :: rho_d
      REAL(dp), DIMENSION(:, :), POINTER                 :: rho_nlcc, weight
      REAL(dp), DIMENSION(:, :, :), POINTER              :: rho0_h, rho0_s, rho1_h, rho1_s, rho_h, &
                                                            rho_s, tau0_h, tau0_s, tau1_h, tau1_s, &
                                                            tau_h, tau_s, vtau_h, vtau_s
      REAL(dp), DIMENSION(:, :, :, :), POINTER           :: drho0_h, drho0_s, drho1_h, drho1_s, &
                                                            drho_h, drho_s, vxg_h, vxg_s
      REAL(KIND=dp), DIMENSION(-4:4)                     :: ak
      TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(gto_basis_set_type), POINTER                  :: basis_1c
      TYPE(harmonics_atom_type), POINTER                 :: harmonics
      TYPE(mp_para_env_type), POINTER                    :: para_env
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: dr_h, dr_s, fint_hh, fint_ss, int_hh, &
                                                            int_ss, r_h, r_s
      TYPE(rho_atom_coeff), DIMENSION(:, :), POINTER     :: r_h_d, r_s_d
      TYPE(rho_atom_type), POINTER                       :: rho0_atom, rho1_atom, rho2_atom
      TYPE(section_vals_type), POINTER                   :: xc_fun_section
      TYPE(xc_derivative_set_type)                       :: deriv_set
      TYPE(xc_rho_cflags_type)                           :: needs
      TYPE(xc_rho_set_type)                              :: rho1_set_h, rho1_set_s, rho_set_h, &
                                                            rho_set_s

      CALL timeset(routineN, handle)

      ak = 0.0_dp
      SELECT CASE (accuracy)
      CASE (:4)
         nstep = 2
         ak(-2:2) = (/1.0_dp, -8.0_dp, 0.0_dp, 8.0_dp, -1.0_dp/)/12.0_dp
      CASE (5:7)
         nstep = 3
         ak(-3:3) = (/-1.0_dp, 9.0_dp, -45.0_dp, 0.0_dp, 45.0_dp, -9.0_dp, 1.0_dp/)/60.0_dp
      CASE (8:)
         nstep = 4
         ak(-4:4) = (/1.0_dp, -32.0_dp/3.0_dp, 56.0_dp, -224.0_dp, 0.0_dp, &
                      224.0_dp, -56.0_dp, 32.0_dp/3.0_dp, -1.0_dp/)/280.0_dp
      END SELECT
      oeps1 = 1.0_dp/epsrho

      CALL get_qs_env(qs_env=qs_env, &
                      dft_control=dft_control, &
                      para_env=para_env, &
                      atomic_kind_set=atomic_kind_set)

      xc_fun_section => section_vals_get_subs_vals(xc_section, "XC_FUNCTIONAL")
      CALL section_vals_val_get(xc_fun_section, "_SECTION_PARAMETERS_", i_val=myfun)

      IF (myfun == xc_none) THEN
         ! no action needed?
      ELSE
         ! calculate fxc
         CALL calculate_xc_2nd_deriv_atom(rho0_atom_set, rho1_atom_set, qs_env, xc_section, para_env, &
                                          do_triplet=is_triplet, kind_set_external=kind_set)

         CALL section_vals_val_get(xc_section, "DENSITY_CUTOFF", r_val=density_cut)
         CALL section_vals_val_get(xc_section, "GRADIENT_CUTOFF", r_val=gradient_cut)
         CALL section_vals_val_get(xc_section, "TAU_CUTOFF", r_val=tau_cut)

         nlcc = has_nlcc(kind_set)
         lsd = dft_control%lsd
         nspins = dft_control%nspins
         mspins = nspins
         IF (is_triplet) THEN
            CPASSERT(nspins == 1)
            lsd = .TRUE.
            mspins = 2
         END IF
         needs = xc_functionals_get_needs(xc_fun_section, lsd=lsd, calc_potential=.TRUE.)
         gradient_f = (needs%drho .OR. needs%drho_spin)
         tau_f = (needs%tau .OR. needs%tau_spin)

         ! Here starts the loop over all the atoms
         DO ikind = 1, SIZE(atomic_kind_set)
            CALL get_atomic_kind(atomic_kind_set(ikind), atom_list=atom_list, natom=natom)
            CALL get_qs_kind(kind_set(ikind), paw_atom=paw_atom, &
                             harmonics=harmonics, grid_atom=grid_atom)
            CALL get_qs_kind(kind_set(ikind), basis_set=basis_1c, basis_type="GAPW_1C")

            IF (.NOT. paw_atom) CYCLE

            nr = grid_atom%nr
            na = grid_atom%ng_sphere

            ! Prepare the structures needed to calculate and store the xc derivatives

            ! Array dimension: here anly one dimensional arrays are used,
            ! i.e. only the first column of deriv_data is read.
            ! The other to dimensions  are set to size equal 1
            bounds(1:2, 1:3) = 1
            bounds(2, 1) = na
            bounds(2, 2) = nr

            ! create a place where to put the derivatives
            CALL xc_dset_create(deriv_set, local_bounds=bounds)
            ! create the place where to store the argument for the functionals
            CALL xc_rho_set_create(rho_set_h, bounds, rho_cutoff=density_cut, &
                                   drho_cutoff=gradient_cut, tau_cutoff=tau_cut)
            CALL xc_rho_set_create(rho_set_s, bounds, rho_cutoff=density_cut, &
                                   drho_cutoff=gradient_cut, tau_cutoff=tau_cut)
            CALL xc_rho_set_create(rho1_set_h, bounds, rho_cutoff=density_cut, &
                                   drho_cutoff=gradient_cut, tau_cutoff=tau_cut)
            CALL xc_rho_set_create(rho1_set_s, bounds, rho_cutoff=density_cut, &
                                   drho_cutoff=gradient_cut, tau_cutoff=tau_cut)

            ! allocate the required 3d arrays where to store rho and drho
            CALL xc_rho_set_atom_update(rho_set_h, needs, mspins, bounds)
            CALL xc_rho_set_atom_update(rho_set_s, needs, mspins, bounds)
            CALL xc_rho_set_atom_update(rho1_set_h, needs, mspins, bounds)
            CALL xc_rho_set_atom_update(rho1_set_s, needs, mspins, bounds)

            weight => grid_atom%weight

            ALLOCATE (rho_h(na, nr, nspins), rho_s(na, nr, nspins), &
                      rho0_h(na, nr, nspins), rho0_s(na, nr, nspins), &
                      rho1_h(na, nr, nspins), rho1_s(na, nr, nspins))
            ALLOCATE (vxc_h(na, nr, nspins), vxc_s(na, nr, nspins))
            IF (gradient_f) THEN
               ALLOCATE (drho_h(4, na, nr, nspins), drho_s(4, na, nr, nspins), &
                         drho0_h(4, na, nr, nspins), drho0_s(4, na, nr, nspins), &
                         drho1_h(4, na, nr, nspins), drho1_s(4, na, nr, nspins))
               ALLOCATE (vxg_h(3, na, nr, nspins), vxg_s(3, na, nr, nspins))
            END IF
            IF (tau_f) THEN
               ALLOCATE (tau_h(na, nr, nspins), tau_s(na, nr, nspins), &
                         tau0_h(na, nr, nspins), tau0_s(na, nr, nspins), &
                         tau1_h(na, nr, nspins), tau1_s(na, nr, nspins))
               ALLOCATE (vtau_h(na, nr, nspins), vtau_s(na, nr, nspins))
            END IF
            !
            ! NLCC: prepare rho and drho of the core charge for this KIND
            donlcc = .FALSE.
            IF (nlcc) THEN
               NULLIFY (rho_nlcc)
               rho_nlcc => kind_set(ikind)%nlcc_pot
               IF (ASSOCIATED(rho_nlcc)) donlcc = .TRUE.
            END IF

            ! Distribute the atoms of this kind
            num_pe = para_env%num_pe
            bo = get_limit(natom, num_pe, para_env%mepos)

            DO iat = bo(1), bo(2)
               iatom = atom_list(iat)
               !
               NULLIFY (int_hh, int_ss)
               rho0_atom => rho0_atom_set(iatom)
               CALL get_rho_atom(rho_atom=rho0_atom, ga_Vlocal_gb_h=int_hh, ga_Vlocal_gb_s=int_ss)
               ALLOCATE (fint_ss(nspins), fint_hh(nspins))
               DO ns = 1, nspins
                  nf = SIZE(int_ss(ns)%r_coef, 1)
                  ALLOCATE (fint_ss(ns)%r_coef(nf, nf))
                  nf = SIZE(int_hh(ns)%r_coef, 1)
                  ALLOCATE (fint_hh(ns)%r_coef(nf, nf))
               END DO

               ! RHO0
               rho0_h = 0.0_dp
               rho0_s = 0.0_dp
               rho0_atom => rho0_atom_set(iatom)
               IF (gradient_f) THEN
                  NULLIFY (r_h, r_s, dr_h, dr_s, r_h_d, r_s_d)
                  CALL get_rho_atom(rho_atom=rho0_atom, rho_rad_h=r_h, rho_rad_s=r_s, drho_rad_h=dr_h, &
                                    drho_rad_s=dr_s, rho_rad_h_d=r_h_d, rho_rad_s_d=r_s_d)
                  drho0_h = 0.0_dp
                  drho0_s = 0.0_dp
               ELSE
                  NULLIFY (r_h, r_s)
                  CALL get_rho_atom(rho_atom=rho0_atom, rho_rad_h=r_h, rho_rad_s=r_s)
                  rho_d = 0.0_dp
               END IF
               DO ir = 1, nr
                  CALL calc_rho_angular(grid_atom, harmonics, nspins, gradient_f, &
                                        ir, r_h, r_s, rho0_h, rho0_s, dr_h, dr_s, &
                                        r_h_d, r_s_d, drho0_h, drho0_s)
                  IF (donlcc) THEN
                     CALL calc_rho_nlcc(grid_atom, nspins, gradient_f, &
                                        ir, rho_nlcc(:, 1), rho0_h, rho0_s, rho_nlcc(:, 2), drho0_h, drho0_s)
                  END IF
               END DO
               IF (tau_f) THEN
                  !compute tau on the grid all at once
                  CALL calc_tau_atom(tau0_h, tau0_s, rho0_atom, kind_set(ikind), nspins)
               ELSE
                  tau_d = 0.0_dp
               END IF
               ! RHO1
               rho1_h = 0.0_dp
               rho1_s = 0.0_dp
               rho1_atom => rho1_atom_set(iatom)
               IF (gradient_f) THEN
                  NULLIFY (r_h, r_s, dr_h, dr_s, r_h_d, r_s_d)
                  CALL get_rho_atom(rho_atom=rho1_atom, rho_rad_h=r_h, rho_rad_s=r_s, drho_rad_h=dr_h, &
                                    drho_rad_s=dr_s, rho_rad_h_d=r_h_d, rho_rad_s_d=r_s_d)
                  drho1_h = 0.0_dp
                  drho1_s = 0.0_dp
               ELSE
                  NULLIFY (r_h, r_s)
                  CALL get_rho_atom(rho_atom=rho1_atom, rho_rad_h=r_h, rho_rad_s=r_s)
               END IF
               DO ir = 1, nr
                  CALL calc_rho_angular(grid_atom, harmonics, nspins, gradient_f, &
                                        ir, r_h, r_s, rho1_h, rho1_s, dr_h, dr_s, &
                                        r_h_d, r_s_d, drho1_h, drho1_s)
               END DO
               IF (tau_f) THEN
                  !compute tau on the grid all at once
                  CALL calc_tau_atom(tau1_h, tau1_s, rho1_atom, kind_set(ikind), nspins)
               END IF

               DO ir = 1, nr
                  IF (tau_f) THEN
                     CALL fill_rho_set(rho1_set_h, lsd, nspins, needs, rho1_h, drho1_h, tau1_h, na, ir)
                     CALL fill_rho_set(rho1_set_s, lsd, nspins, needs, rho1_s, drho1_s, tau1_s, na, ir)
                  ELSE IF (gradient_f) THEN
                     CALL fill_rho_set(rho1_set_h, lsd, nspins, needs, rho1_h, drho1_h, tau_d, na, ir)
                     CALL fill_rho_set(rho1_set_s, lsd, nspins, needs, rho1_s, drho1_s, tau_d, na, ir)
                  ELSE
                     CALL fill_rho_set(rho1_set_h, lsd, nspins, needs, rho1_h, rho_d, tau_d, na, ir)
                     CALL fill_rho_set(rho1_set_s, lsd, nspins, needs, rho1_s, rho_d, tau_d, na, ir)
                  END IF
               END DO

               ! RHO2
               rho2_atom => rho2_atom_set(iatom)

               DO istep = -nstep, nstep

                  beta = REAL(istep, KIND=dp)*epsrho

                  rho_h = rho0_h + beta*rho1_h
                  rho_s = rho0_s + beta*rho1_s
                  IF (gradient_f) THEN
                     drho_h = drho0_h + beta*drho1_h
                     drho_s = drho0_s + beta*drho1_s
                  END IF
                  IF (tau_f) THEN
                     tau_h = tau0_h + beta*tau1_h
                     tau_s = tau0_s + beta*tau1_s
                  END IF
                  !
                  IF (gradient_f) THEN
                     drho_h(4, :, :, :) = SQRT( &
                                          drho_h(1, :, :, :)*drho_h(1, :, :, :) + &
                                          drho_h(2, :, :, :)*drho_h(2, :, :, :) + &
                                          drho_h(3, :, :, :)*drho_h(3, :, :, :))

                     drho_s(4, :, :, :) = SQRT( &
                                          drho_s(1, :, :, :)*drho_s(1, :, :, :) + &
                                          drho_s(2, :, :, :)*drho_s(2, :, :, :) + &
                                          drho_s(3, :, :, :)*drho_s(3, :, :, :))
                  END IF

                  DO ir = 1, nr
                     IF (tau_f) THEN
                        CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, drho_h, tau_h, na, ir)
                        CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, drho_s, tau_s, na, ir)
                     ELSE IF (gradient_f) THEN
                        CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, drho_h, tau_d, na, ir)
                        CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, drho_s, tau_d, na, ir)
                     ELSE
                        CALL fill_rho_set(rho_set_h, lsd, nspins, needs, rho_h, rho_d, tau_d, na, ir)
                        CALL fill_rho_set(rho_set_s, lsd, nspins, needs, rho_s, rho_d, tau_d, na, ir)
                     END IF
                  END DO

                  ! hard atom density !
                  CALL xc_dset_zero_all(deriv_set)
                  CALL xc_2nd_deriv_of_r(xc_section=xc_section, &
                                         rho_set=rho_set_h, rho1_set=rho1_set_h, &
                                         deriv_set=deriv_set, &
                                         w=weight, vxc=vxc_h, vxg=vxg_h, do_triplet=is_triplet)
                  ! soft atom density !
                  CALL xc_dset_zero_all(deriv_set)
                  CALL xc_2nd_deriv_of_r(xc_section=xc_section, &
                                         rho_set=rho_set_s, rho1_set=rho1_set_s, &
                                         deriv_set=deriv_set, &
                                         w=weight, vxc=vxc_s, vxg=vxg_s, do_triplet=is_triplet)
                  ! potentials
                  DO ns = 1, nspins
                     fint_hh(ns)%r_coef(:, :) = 0.0_dp
                     fint_ss(ns)%r_coef(:, :) = 0.0_dp
                  END DO
                  IF (gradient_f) THEN
                     CALL gaVxcgb_GC(vxc_h, vxc_s, vxg_h, vxg_s, fint_hh, fint_ss, &
                                     grid_atom, basis_1c, harmonics, nspins)
                  ELSE
                     CALL gaVxcgb_noGC(vxc_h, vxc_s, fint_hh, fint_ss, &
                                       grid_atom, basis_1c, harmonics, nspins)
                  END IF
                  IF (tau_f) THEN
                     CALL dgaVtaudgb(vtau_h, vtau_s, fint_hh, fint_ss, &
                                     grid_atom, basis_1c, harmonics, nspins)
                  END IF
                  ! second derivative gxc
                  NULLIFY (int_hh, int_ss)
                  CALL get_rho_atom(rho_atom=rho2_atom, ga_Vlocal_gb_h=int_hh, ga_Vlocal_gb_s=int_ss)
                  DO ns = 1, nspins
                     int_ss(ns)%r_coef(:, :) = int_ss(ns)%r_coef(:, :) + oeps1*ak(istep)*fint_ss(ns)%r_coef(:, :)
                     int_hh(ns)%r_coef(:, :) = int_hh(ns)%r_coef(:, :) + oeps1*ak(istep)*fint_hh(ns)%r_coef(:, :)
                  END DO
               END DO
               !
               DO ns = 1, nspins
                  DEALLOCATE (fint_ss(ns)%r_coef)
                  DEALLOCATE (fint_hh(ns)%r_coef)
               END DO
               DEALLOCATE (fint_ss, fint_hh)

            END DO ! iat

            ! Release the xc structure used to store the xc derivatives
            CALL xc_dset_release(deriv_set)
            CALL xc_rho_set_release(rho_set_h)
            CALL xc_rho_set_release(rho_set_s)
            CALL xc_rho_set_release(rho1_set_h)
            CALL xc_rho_set_release(rho1_set_s)

            DEALLOCATE (rho_h, rho_s, rho0_h, rho0_s, rho1_h, rho1_s)
            DEALLOCATE (vxc_h, vxc_s)
            IF (gradient_f) THEN
               DEALLOCATE (drho_h, drho_s, drho0_h, drho0_s, drho1_h, drho1_s)
               DEALLOCATE (vxg_h, vxg_s)
            END IF
            IF (tau_f) THEN
               DEALLOCATE (tau_h, tau_s, tau0_h, tau0_s, tau1_h, tau1_s)
               DEALLOCATE (vtau_h, vtau_s)
            END IF

         END DO ! ikind

      END IF !xc_none

      CALL timestop(handle)

   END SUBROUTINE gfxc_atom_diff

! **************************************************************************************************
!> \brief ...
!> \param grid_atom ...
!> \param harmonics ...
!> \param nspins ...
!> \param grad_func ...
!> \param ir ...
!> \param r_h ...
!> \param r_s ...
!> \param rho_h ...
!> \param rho_s ...
!> \param dr_h ...
!> \param dr_s ...
!> \param r_h_d ...
!> \param r_s_d ...
!> \param drho_h ...
!> \param drho_s ...
! **************************************************************************************************
   SUBROUTINE calc_rho_angular(grid_atom, harmonics, nspins, grad_func, &
                               ir, r_h, r_s, rho_h, rho_s, &
                               dr_h, dr_s, r_h_d, r_s_d, drho_h, drho_s)

      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(harmonics_atom_type), POINTER                 :: harmonics
      INTEGER, INTENT(IN)                                :: nspins
      LOGICAL, INTENT(IN)                                :: grad_func
      INTEGER, INTENT(IN)                                :: ir
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: r_h, r_s
      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: rho_h, rho_s
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: dr_h, dr_s
      TYPE(rho_atom_coeff), DIMENSION(:, :), POINTER     :: r_h_d, r_s_d
      REAL(KIND=dp), DIMENSION(:, :, :, :), POINTER      :: drho_h, drho_s

      INTEGER                                            :: ia, iso, ispin, na
      REAL(KIND=dp)                                      :: rad, urad

      CPASSERT(ASSOCIATED(r_h))
      CPASSERT(ASSOCIATED(r_s))
      CPASSERT(ASSOCIATED(rho_h))
      CPASSERT(ASSOCIATED(rho_s))
      IF (grad_func) THEN
         CPASSERT(ASSOCIATED(dr_h))
         CPASSERT(ASSOCIATED(dr_s))
         CPASSERT(ASSOCIATED(r_h_d))
         CPASSERT(ASSOCIATED(r_s_d))
         CPASSERT(ASSOCIATED(drho_h))
         CPASSERT(ASSOCIATED(drho_s))
      END IF

      na = grid_atom%ng_sphere
      rad = grid_atom%rad(ir)
      urad = grid_atom%oorad2l(ir, 1)
      DO ispin = 1, nspins
         DO iso = 1, harmonics%max_iso_not0
            DO ia = 1, na
               rho_h(ia, ir, ispin) = rho_h(ia, ir, ispin) + &
                                      r_h(ispin)%r_coef(ir, iso)*harmonics%slm(ia, iso)
               rho_s(ia, ir, ispin) = rho_s(ia, ir, ispin) + &
                                      r_s(ispin)%r_coef(ir, iso)*harmonics%slm(ia, iso)
            END DO ! ia
         END DO ! iso
      END DO ! ispin

      IF (grad_func) THEN
         DO ispin = 1, nspins
            DO iso = 1, harmonics%max_iso_not0
               DO ia = 1, na

                  ! components of the gradient of rho1 hard
                  drho_h(1, ia, ir, ispin) = drho_h(1, ia, ir, ispin) + &
                                             dr_h(ispin)%r_coef(ir, iso)* &
                                             harmonics%a(1, ia)*harmonics%slm(ia, iso) + &
                                             r_h_d(1, ispin)%r_coef(ir, iso)* &
                                             harmonics%slm(ia, iso)

                  drho_h(2, ia, ir, ispin) = drho_h(2, ia, ir, ispin) + &
                                             dr_h(ispin)%r_coef(ir, iso)* &
                                             harmonics%a(2, ia)*harmonics%slm(ia, iso) + &
                                             r_h_d(2, ispin)%r_coef(ir, iso)* &
                                             harmonics%slm(ia, iso)

                  drho_h(3, ia, ir, ispin) = drho_h(3, ia, ir, ispin) + &
                                             dr_h(ispin)%r_coef(ir, iso)* &
                                             harmonics%a(3, ia)*harmonics%slm(ia, iso) + &
                                             r_h_d(3, ispin)%r_coef(ir, iso)* &
                                             harmonics%slm(ia, iso)

                  ! components of the gradient of rho1 soft
                  drho_s(1, ia, ir, ispin) = drho_s(1, ia, ir, ispin) + &
                                             dr_s(ispin)%r_coef(ir, iso)* &
                                             harmonics%a(1, ia)*harmonics%slm(ia, iso) + &
                                             r_s_d(1, ispin)%r_coef(ir, iso)* &
                                             harmonics%slm(ia, iso)

                  drho_s(2, ia, ir, ispin) = drho_s(2, ia, ir, ispin) + &
                                             dr_s(ispin)%r_coef(ir, iso)* &
                                             harmonics%a(2, ia)*harmonics%slm(ia, iso) + &
                                             r_s_d(2, ispin)%r_coef(ir, iso)* &
                                             harmonics%slm(ia, iso)

                  drho_s(3, ia, ir, ispin) = drho_s(3, ia, ir, ispin) + &
                                             dr_s(ispin)%r_coef(ir, iso)* &
                                             harmonics%a(3, ia)*harmonics%slm(ia, iso) + &
                                             r_s_d(3, ispin)%r_coef(ir, iso)* &
                                             harmonics%slm(ia, iso)

                  drho_h(4, ia, ir, ispin) = SQRT( &
                                             drho_h(1, ia, ir, ispin)*drho_h(1, ia, ir, ispin) + &
                                             drho_h(2, ia, ir, ispin)*drho_h(2, ia, ir, ispin) + &
                                             drho_h(3, ia, ir, ispin)*drho_h(3, ia, ir, ispin))

                  drho_s(4, ia, ir, ispin) = SQRT( &
                                             drho_s(1, ia, ir, ispin)*drho_s(1, ia, ir, ispin) + &
                                             drho_s(2, ia, ir, ispin)*drho_s(2, ia, ir, ispin) + &
                                             drho_s(3, ia, ir, ispin)*drho_s(3, ia, ir, ispin))

               END DO ! ia
            END DO ! iso
         END DO ! ispin
      END IF

   END SUBROUTINE calc_rho_angular

! **************************************************************************************************
!> \brief Computes tau hard and soft on the atomic grids for meta-GGA calculations
!> \param tau_h the hard part of tau
!> \param tau_s the soft part of tau
!> \param rho_atom ...
!> \param qs_kind ...
!> \param nspins ...
!> \note This is a rewrite to correct a meta-GGA GAPW bug. This is more brute force than the original,
!>       which was done along in qs_rho_atom_methods.F, but makes sure that no corner is cut in
!>       terms of accuracy (A. Bussy)
! **************************************************************************************************
   SUBROUTINE calc_tau_atom(tau_h, tau_s, rho_atom, qs_kind, nspins)

      REAL(dp), DIMENSION(:, :, :), INTENT(INOUT)        :: tau_h, tau_s
      TYPE(rho_atom_type), POINTER                       :: rho_atom
      TYPE(qs_kind_type), INTENT(IN)                     :: qs_kind
      INTEGER, INTENT(IN)                                :: nspins

      CHARACTER(len=*), PARAMETER                        :: routineN = 'calc_tau_atom'

      INTEGER                                            :: dir, handle, ia, ip, ipgf, ir, iset, &
                                                            iso, ispin, jp, jpgf, jset, jso, l, &
                                                            maxso, na, nr, nset, starti, startj
      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, o2nindex
      REAL(dp)                                           :: cpc_h, cpc_s
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fga, fgr, r1, r2
      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: a1, a2
      REAL(dp), DIMENSION(:, :), POINTER                 :: slm, zet
      REAL(dp), DIMENSION(:, :, :), POINTER              :: dslm_dxyz
      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(gto_basis_set_type), POINTER                  :: basis_1c
      TYPE(harmonics_atom_type), POINTER                 :: harmonics

      NULLIFY (harmonics, grid_atom, basis_1c, lmax, lmin, npgf, zet, slm, dslm_dxyz, o2nindex)

      CALL timeset(routineN, handle)

      !We need to put 0.5* grad_g1 dot grad_gw on the grid
      !For this we need grid info, basis info, and projector info
      CALL get_qs_kind(qs_kind, grid_atom=grid_atom, harmonics=harmonics)
      CALL get_qs_kind(qs_kind, basis_set=basis_1c, basis_type="GAPW_1C")

      nr = grid_atom%nr
      na = grid_atom%ng_sphere

      slm => harmonics%slm
      dslm_dxyz => harmonics%dslm_dxyz

      CALL get_paw_basis_info(basis_1c, o2nindex=o2nindex)

      !zeroing tau, assuming it is already allocated
      tau_h = 0.0_dp
      tau_s = 0.0_dp

      CALL get_gto_basis_set(gto_basis_set=basis_1c, lmax=lmax, lmin=lmin, npgf=npgf, &
                             nset=nset, zet=zet, maxso=maxso)

      !Separate the functions into purely r and purely angular parts, precompute them all
      ALLOCATE (a1(na, nset*maxso, 3), a2(na, nset*maxso, 3))
      ALLOCATE (r1(nr, nset*maxso), r2(nr, nset*maxso))
      a1 = 0.0_dp; a2 = 0.0_dp
      r1 = 0.0_dp; r2 = 0.0_dp

      DO iset = 1, nset
         DO ipgf = 1, npgf(iset)
            starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
            DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
               l = indso(1, iso)

               ! The x derivative of the spherical orbital, divided in angular and radial parts
               ! Two of each are needed because d/dx(r^l Y_lm) * exp(-al*r^2) + r^l Y_lm * ! d/dx(exp-al*r^2)

               ! the purely radial part of d/dx(r^l Y_lm) * exp(-al*r^2) (same for y, z)
               r1(1:nr, starti + iso) = grid_atom%rad(1:nr)**(l - 1)*EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))

               ! the purely radial part of r^l Y_lm * d/dx(exp-al*r^2) (same for y, z)
               r2(1:nr, starti + iso) = -2.0_dp*zet(ipgf, iset)*grid_atom%rad(1:nr)**(l + 1) &
                                        *EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))

               DO dir = 1, 3
                  ! the purely angular part of d/dx(r^l Y_lm) * exp(-al*r^2)
                  a1(1:na, starti + iso, dir) = dslm_dxyz(dir, 1:na, iso)

                  ! the purely angular part of r^l Y_lm * d/dx(exp-al*r^2)
                  a2(1:na, starti + iso, dir) = harmonics%a(dir, 1:na)*slm(1:na, iso)
               END DO

            END DO !iso
         END DO !ipgf
      END DO !iset

      !Compute the matrix products
      ALLOCATE (fga(na, 1))
      ALLOCATE (fgr(nr, 1))
      fga = 0.0_dp; fgr = 0.0_dp

      DO iset = 1, nset
         DO jset = 1, nset
            DO ipgf = 1, npgf(iset)
               starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
               DO jpgf = 1, npgf(jset)
                  startj = (jset - 1)*maxso + (jpgf - 1)*nsoset(lmax(jset))

                  DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
                     DO jso = nsoset(lmin(jset) - 1) + 1, nsoset(lmax(jset))

                        ip = o2nindex(starti + iso)
                        jp = o2nindex(startj + jso)

                        !Two component per function => 4 terms in total

                        ! take r1*a1(dir) *  r1*a1(dir)
                        fgr(1:nr, 1) = r1(1:nr, starti + iso)*r1(1:nr, startj + jso)
                        DO dir = 1, 3
                           fga(1:na, 1) = a1(1:na, starti + iso, dir)*a1(1:na, startj + jso, dir)

                           DO ispin = 1, nspins
                              !get the projectors
                              cpc_h = rho_atom%cpc_h(ispin)%r_coef(ip, jp)
                              cpc_s = rho_atom%cpc_s(ispin)%r_coef(ip, jp)

                              !compute contribution to tau
                              DO ir = 1, nr
                                 DO ia = 1, na
                                    tau_h(ia, ir, ispin) = tau_h(ia, ir, ispin) + 0.5_dp*cpc_h* &
                                                           fgr(ir, 1)*fga(ia, 1)

                                    tau_s(ia, ir, ispin) = tau_s(ia, ir, ispin) + 0.5_dp*cpc_s* &
                                                           fgr(ir, 1)*fga(ia, 1)
                                 END DO
                              END DO

                           END DO !ispin
                        END DO !dir

                        ! add r1*a1(dir) * r2*a2(dir)
                        fgr(1:nr, 1) = r1(1:nr, starti + iso)*r2(1:nr, startj + jso)
                        DO dir = 1, 3
                           fga(1:na, 1) = a1(1:na, starti + iso, dir)*a2(1:na, startj + jso, dir)

                           DO ispin = 1, nspins
                              !get the projectors
                              cpc_h = rho_atom%cpc_h(ispin)%r_coef(ip, jp)
                              cpc_s = rho_atom%cpc_s(ispin)%r_coef(ip, jp)

                              !compute contribution to tau
                              DO ir = 1, nr
                                 DO ia = 1, na
                                    tau_h(ia, ir, ispin) = tau_h(ia, ir, ispin) + 0.5_dp*cpc_h* &
                                                           fgr(ir, 1)*fga(ia, 1)

                                    tau_s(ia, ir, ispin) = tau_s(ia, ir, ispin) + 0.5_dp*cpc_s* &
                                                           fgr(ir, 1)*fga(ia, 1)
                                 END DO
                              END DO

                           END DO !ispin
                        END DO !dir

                        ! add r2*a2(dir) * V * r1*a1(dir)
                        fgr(1:nr, 1) = r2(1:nr, starti + iso)*r1(1:nr, startj + jso)
                        DO dir = 1, 3
                           fga(1:na, 1) = a2(1:na, starti + iso, dir)*a1(1:na, startj + jso, dir)

                           DO ispin = 1, nspins
                              !get the projectors
                              cpc_h = rho_atom%cpc_h(ispin)%r_coef(ip, jp)
                              cpc_s = rho_atom%cpc_s(ispin)%r_coef(ip, jp)

                              !compute contribution to tau
                              DO ir = 1, nr
                                 DO ia = 1, na
                                    tau_h(ia, ir, ispin) = tau_h(ia, ir, ispin) + 0.5_dp*cpc_h* &
                                                           fgr(ir, 1)*fga(ia, 1)

                                    tau_s(ia, ir, ispin) = tau_s(ia, ir, ispin) + 0.5_dp*cpc_s* &
                                                           fgr(ir, 1)*fga(ia, 1)
                                 END DO
                              END DO

                           END DO !ispin
                        END DO !dir

                        ! add the last term: r2*a2(dir) * r2*a2(dir)
                        fgr(1:nr, 1) = r2(1:nr, starti + iso)*r2(1:nr, startj + jso)
                        DO dir = 1, 3
                           fga(1:na, 1) = a2(1:na, starti + iso, dir)*a2(1:na, startj + jso, dir)

                           DO ispin = 1, nspins
                              !get the projectors
                              cpc_h = rho_atom%cpc_h(ispin)%r_coef(ip, jp)
                              cpc_s = rho_atom%cpc_s(ispin)%r_coef(ip, jp)

                              !compute contribution to tau
                              DO ir = 1, nr
                                 DO ia = 1, na
                                    tau_h(ia, ir, ispin) = tau_h(ia, ir, ispin) + 0.5_dp*cpc_h* &
                                                           fgr(ir, 1)*fga(ia, 1)

                                    tau_s(ia, ir, ispin) = tau_s(ia, ir, ispin) + 0.5_dp*cpc_s* &
                                                           fgr(ir, 1)*fga(ia, 1)
                                 END DO
                              END DO

                           END DO !ispin
                        END DO !dir

                     END DO !jso
                  END DO !iso

               END DO !jpgf
            END DO !ipgf
         END DO !jset
      END DO !iset

      DEALLOCATE (o2nindex)

      CALL timestop(handle)

   END SUBROUTINE calc_tau_atom

! **************************************************************************************************
!> \brief ...
!> \param grid_atom ...
!> \param nspins ...
!> \param grad_func ...
!> \param ir ...
!> \param rho_nlcc ...
!> \param rho_h ...
!> \param rho_s ...
!> \param drho_nlcc ...
!> \param drho_h ...
!> \param drho_s ...
! **************************************************************************************************
   SUBROUTINE calc_rho_nlcc(grid_atom, nspins, grad_func, &
                            ir, rho_nlcc, rho_h, rho_s, drho_nlcc, drho_h, drho_s)

      TYPE(grid_atom_type), POINTER                      :: grid_atom
      INTEGER, INTENT(IN)                                :: nspins
      LOGICAL, INTENT(IN)                                :: grad_func
      INTEGER, INTENT(IN)                                :: ir
      REAL(KIND=dp), DIMENSION(:)                        :: rho_nlcc
      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: rho_h, rho_s
      REAL(KIND=dp), DIMENSION(:)                        :: drho_nlcc
      REAL(KIND=dp), DIMENSION(:, :, :, :), POINTER      :: drho_h, drho_s

      INTEGER                                            :: ia, ispin, na
      REAL(KIND=dp)                                      :: drho, dx, dy, dz, rad, rho, urad, xsp

      CPASSERT(ASSOCIATED(rho_h))
      CPASSERT(ASSOCIATED(rho_s))
      IF (grad_func) THEN
         CPASSERT(ASSOCIATED(drho_h))
         CPASSERT(ASSOCIATED(drho_s))
      END IF

      na = grid_atom%ng_sphere
      rad = grid_atom%rad(ir)
      urad = grid_atom%oorad2l(ir, 1)

      xsp = REAL(nspins, KIND=dp)
      rho = rho_nlcc(ir)/xsp
      DO ispin = 1, nspins
         rho_h(1:na, ir, ispin) = rho_h(1:na, ir, ispin) + rho
         rho_s(1:na, ir, ispin) = rho_s(1:na, ir, ispin) + rho
      END DO ! ispin

      IF (grad_func) THEN
         drho = drho_nlcc(ir)/xsp
         DO ispin = 1, nspins
            DO ia = 1, na
               IF (grid_atom%azi(ia) == 0.0_dp) THEN
                  dx = 0.0_dp
                  dy = 0.0_dp
               ELSE
                  dx = grid_atom%sin_pol(ia)*grid_atom%sin_azi(ia)
                  dy = grid_atom%sin_pol(ia)*grid_atom%cos_azi(ia)
               END IF
               dz = grid_atom%cos_pol(ia)
               ! components of the gradient of rho1 hard
               drho_h(1, ia, ir, ispin) = drho_h(1, ia, ir, ispin) + drho*dx
               drho_h(2, ia, ir, ispin) = drho_h(2, ia, ir, ispin) + drho*dy
               drho_h(3, ia, ir, ispin) = drho_h(3, ia, ir, ispin) + drho*dz
               ! components of the gradient of rho1 soft
               drho_s(1, ia, ir, ispin) = drho_s(1, ia, ir, ispin) + drho*dx
               drho_s(2, ia, ir, ispin) = drho_s(2, ia, ir, ispin) + drho*dy
               drho_s(3, ia, ir, ispin) = drho_s(3, ia, ir, ispin) + drho*dz
               ! norm of gradient
               drho_h(4, ia, ir, ispin) = SQRT( &
                                          drho_h(1, ia, ir, ispin)*drho_h(1, ia, ir, ispin) + &
                                          drho_h(2, ia, ir, ispin)*drho_h(2, ia, ir, ispin) + &
                                          drho_h(3, ia, ir, ispin)*drho_h(3, ia, ir, ispin))

               drho_s(4, ia, ir, ispin) = SQRT( &
                                          drho_s(1, ia, ir, ispin)*drho_s(1, ia, ir, ispin) + &
                                          drho_s(2, ia, ir, ispin)*drho_s(2, ia, ir, ispin) + &
                                          drho_s(3, ia, ir, ispin)*drho_s(3, ia, ir, ispin))
            END DO ! ia
         END DO ! ispin
      END IF

   END SUBROUTINE calc_rho_nlcc

! **************************************************************************************************
!> \brief ...
!> \param vxc_h ...
!> \param vxc_s ...
!> \param int_hh ...
!> \param int_ss ...
!> \param grid_atom ...
!> \param basis_1c ...
!> \param harmonics ...
!> \param nspins ...
! **************************************************************************************************
   SUBROUTINE gaVxcgb_noGC(vxc_h, vxc_s, int_hh, int_ss, grid_atom, basis_1c, harmonics, nspins)

      REAL(dp), DIMENSION(:, :, :), POINTER              :: vxc_h, vxc_s
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: int_hh, int_ss
      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(gto_basis_set_type), POINTER                  :: basis_1c
      TYPE(harmonics_atom_type), POINTER                 :: harmonics
      INTEGER, INTENT(IN)                                :: nspins

      CHARACTER(len=*), PARAMETER                        :: routineN = 'gaVxcgb_noGC'

      INTEGER :: handle, ia, ic, icg, ipgf1, ipgf2, ir, iset1, iset2, iso, iso1, iso2, ispin, l, &
         ld, lmax12, lmax_expansion, lmin12, m1, m2, max_iso_not0, max_iso_not0_local, max_s_harm, &
         maxl, maxso, n1, n2, na, ngau1, ngau2, nngau1, nr, nset, size1
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cg_n_list
      INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: cg_list
      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: g1, g2
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: gg, gVg_h, gVg_s, matso_h, matso_s, vx
      REAL(dp), DIMENSION(:, :), POINTER                 :: zet
      REAL(dp), DIMENSION(:, :, :), POINTER              :: my_CG

      CALL timeset(routineN, handle)

      NULLIFY (lmin, lmax, npgf, zet, my_CG)

      CALL get_gto_basis_set(gto_basis_set=basis_1c, lmax=lmax, lmin=lmin, &
                             maxso=maxso, maxl=maxl, npgf=npgf, &
                             nset=nset, zet=zet)

      nr = grid_atom%nr
      na = grid_atom%ng_sphere
      my_CG => harmonics%my_CG
      max_iso_not0 = harmonics%max_iso_not0
      lmax_expansion = indso(1, max_iso_not0)
      max_s_harm = harmonics%max_s_harm

      ALLOCATE (g1(nr), g2(nr), gg(nr, 0:2*maxl))
      ALLOCATE (gVg_h(na, 0:2*maxl), gVg_s(na, 0:2*maxl))
      ALLOCATE (matso_h(nsoset(maxl), nsoset(maxl)), &
                matso_s(nsoset(maxl), nsoset(maxl)))
      ALLOCATE (vx(na, nr))
      ALLOCATE (cg_list(2, nsoset(maxl)**2, max_s_harm), cg_n_list(max_s_harm))

      g1 = 0.0_dp
      g2 = 0.0_dp
      m1 = 0
      DO iset1 = 1, nset
         n1 = nsoset(lmax(iset1))
         m2 = 0
         DO iset2 = 1, nset
            CALL get_none0_cg_list(my_CG, lmin(iset1), lmax(iset1), lmin(iset2), lmax(iset2), &
                                   max_s_harm, lmax_expansion, cg_list, cg_n_list, max_iso_not0_local)
            CPASSERT(max_iso_not0_local .LE. max_iso_not0)

            n2 = nsoset(lmax(iset2))
            DO ipgf1 = 1, npgf(iset1)
               ngau1 = n1*(ipgf1 - 1) + m1
               size1 = nsoset(lmax(iset1)) - nsoset(lmin(iset1) - 1)
               nngau1 = nsoset(lmin(iset1) - 1) + ngau1

               g1(1:nr) = EXP(-zet(ipgf1, iset1)*grid_atom%rad2(1:nr))
               DO ipgf2 = 1, npgf(iset2)
                  ngau2 = n2*(ipgf2 - 1) + m2

                  g2(1:nr) = EXP(-zet(ipgf2, iset2)*grid_atom%rad2(1:nr))
                  lmin12 = lmin(iset1) + lmin(iset2)
                  lmax12 = lmax(iset1) + lmax(iset2)

                  ! reduce expansion local densities
                  IF (lmin12 .LE. lmax_expansion) THEN

                     gg = 0.0_dp
                     IF (lmin12 == 0) THEN
                        gg(1:nr, lmin12) = g1(1:nr)*g2(1:nr)
                     ELSE
                        gg(1:nr, lmin12) = grid_atom%rad2l(1:nr, lmin12)*g1(1:nr)*g2(1:nr)
                     END IF

                     ! limit the expansion of the local densities to a max L
                     IF (lmax12 .GT. lmax_expansion) lmax12 = lmax_expansion

                     DO l = lmin12 + 1, lmax12
                        gg(1:nr, l) = grid_atom%rad(1:nr)*gg(:, l - 1)
                     END DO

                     DO ispin = 1, nspins
                        ld = lmax12 + 1
                        DO ir = 1, nr
                           vx(1:na, ir) = vxc_h(1:na, ir, ispin)
                        END DO
                        CALL dgemm('N', 'N', na, ld, nr, 1.0_dp, vx(1:na, 1:nr), na, &
                                   gg(1:nr, 0:lmax12), nr, 0.0_dp, gVg_h(1:na, 0:lmax12), na)
                        DO ir = 1, nr
                           vx(1:na, ir) = vxc_s(1:na, ir, ispin)
                        END DO
                        CALL dgemm('N', 'N', na, ld, nr, 1.0_dp, vx(1:na, 1:nr), na, &
                                   gg(1:nr, 0:lmax12), nr, 0.0_dp, gVg_s(1:na, 0:lmax12), na)

                        matso_h = 0.0_dp
                        matso_s = 0.0_dp
                        DO iso = 1, max_iso_not0_local
                           DO icg = 1, cg_n_list(iso)
                              iso1 = cg_list(1, icg, iso)
                              iso2 = cg_list(2, icg, iso)
                              l = indso(1, iso1) + indso(1, iso2)

                              CPASSERT(l <= lmax_expansion)
                              DO ia = 1, na
                                 matso_h(iso1, iso2) = matso_h(iso1, iso2) + &
                                                       gVg_h(ia, l)* &
                                                       my_CG(iso1, iso2, iso)* &
                                                       harmonics%slm(ia, iso)
                                 matso_s(iso1, iso2) = matso_s(iso1, iso2) + &
                                                       gVg_s(ia, l)* &
                                                       my_CG(iso1, iso2, iso)* &
                                                       harmonics%slm(ia, iso)
                              END DO
                           END DO
                        END DO

                        ! Write in the global matrix
                        DO ic = nsoset(lmin(iset2) - 1) + 1, nsoset(lmax(iset2))
                           iso1 = nsoset(lmin(iset1) - 1) + 1
                           iso2 = ngau2 + ic
                           CALL daxpy(size1, 1.0_dp, matso_h(iso1, ic), 1, &
                                      int_hh(ispin)%r_coef(nngau1 + 1, iso2), 1)
                           CALL daxpy(size1, 1.0_dp, matso_s(iso1, ic), 1, &
                                      int_ss(ispin)%r_coef(nngau1 + 1, iso2), 1)
                        END DO

                     END DO ! ispin

                  END IF ! lmax_expansion

               END DO ! ipfg2
            END DO ! ipfg1
            m2 = m2 + maxso
         END DO ! iset2
         m1 = m1 + maxso
      END DO ! iset1

      DEALLOCATE (g1, g2, gg, matso_h, matso_s, gVg_s, gVg_h, vx)

      DEALLOCATE (cg_list, cg_n_list)

      CALL timestop(handle)

   END SUBROUTINE gaVxcgb_noGC

! **************************************************************************************************
!> \brief ...
!> \param vxc_h ...
!> \param vxc_s ...
!> \param vxg_h ...
!> \param vxg_s ...
!> \param int_hh ...
!> \param int_ss ...
!> \param grid_atom ...
!> \param basis_1c ...
!> \param harmonics ...
!> \param nspins ...
! **************************************************************************************************
   SUBROUTINE gaVxcgb_GC(vxc_h, vxc_s, vxg_h, vxg_s, int_hh, int_ss, &
                         grid_atom, basis_1c, harmonics, nspins)

      REAL(dp), DIMENSION(:, :, :), POINTER              :: vxc_h, vxc_s
      REAL(dp), DIMENSION(:, :, :, :), POINTER           :: vxg_h, vxg_s
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: int_hh, int_ss
      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(gto_basis_set_type), POINTER                  :: basis_1c
      TYPE(harmonics_atom_type), POINTER                 :: harmonics
      INTEGER, INTENT(IN)                                :: nspins

      CHARACTER(len=*), PARAMETER                        :: routineN = 'gaVxcgb_GC'

      INTEGER :: dmax_iso_not0_local, handle, ia, ic, icg, ipgf1, ipgf2, ir, iset1, iset2, iso, &
         iso1, iso2, ispin, l, lmax12, lmax_expansion, lmin12, m1, m2, max_iso_not0, &
         max_iso_not0_local, max_s_harm, maxl, maxso, n1, n2, na, ngau1, ngau2, nngau1, nr, nset, &
         size1
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cg_n_list, dcg_n_list
      INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: cg_list, dcg_list
      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
      REAL(dp)                                           :: urad
      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: g1, g2
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: dgg, gg, gVXCg_h, gVXCg_s, matso_h, &
                                                            matso_s
      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: gVXGg_h, gVXGg_s
      REAL(dp), DIMENSION(:, :), POINTER                 :: zet
      REAL(dp), DIMENSION(:, :, :), POINTER              :: my_CG
      REAL(dp), DIMENSION(:, :, :, :), POINTER           :: my_CG_dxyz

      CALL timeset(routineN, handle)

      NULLIFY (lmin, lmax, npgf, zet, my_CG, my_CG_dxyz)

      CALL get_gto_basis_set(gto_basis_set=basis_1c, lmax=lmax, lmin=lmin, &
                             maxso=maxso, maxl=maxl, npgf=npgf, &
                             nset=nset, zet=zet)

      nr = grid_atom%nr
      na = grid_atom%ng_sphere
      my_CG => harmonics%my_CG
      my_CG_dxyz => harmonics%my_CG_dxyz
      max_iso_not0 = harmonics%max_iso_not0
      lmax_expansion = indso(1, max_iso_not0)
      max_s_harm = harmonics%max_s_harm

      ALLOCATE (g1(nr), g2(nr), gg(nr, 0:2*maxl), dgg(nr, 0:2*maxl))
      ALLOCATE (gVXCg_h(na, 0:2*maxl), gVXCg_s(na, 0:2*maxl))
      ALLOCATE (gVXGg_h(3, na, 0:2*maxl), gVXGg_s(3, na, 0:2*maxl))
      ALLOCATE (cg_list(2, nsoset(maxl)**2, max_s_harm), cg_n_list(max_s_harm), &
                dcg_list(2, nsoset(maxl)**2, max_s_harm), dcg_n_list(max_s_harm))

      ALLOCATE (matso_h(nsoset(maxl), nsoset(maxl)), &
                matso_s(nsoset(maxl), nsoset(maxl)))

      DO ispin = 1, nspins

         g1 = 0.0_dp
         g2 = 0.0_dp
         m1 = 0
         DO iset1 = 1, nset
            n1 = nsoset(lmax(iset1))
            m2 = 0
            DO iset2 = 1, nset
               CALL get_none0_cg_list(my_CG, lmin(iset1), lmax(iset1), lmin(iset2), lmax(iset2), &
                                      max_s_harm, lmax_expansion, cg_list, cg_n_list, max_iso_not0_local)
               CPASSERT(max_iso_not0_local .LE. max_iso_not0)
               CALL get_none0_cg_list(my_CG_dxyz, lmin(iset1), lmax(iset1), lmin(iset2), lmax(iset2), &
                                      max_s_harm, lmax_expansion, dcg_list, dcg_n_list, dmax_iso_not0_local)

               n2 = nsoset(lmax(iset2))
               DO ipgf1 = 1, npgf(iset1)
                  ngau1 = n1*(ipgf1 - 1) + m1
                  size1 = nsoset(lmax(iset1)) - nsoset(lmin(iset1) - 1)
                  nngau1 = nsoset(lmin(iset1) - 1) + ngau1

                  g1(1:nr) = EXP(-zet(ipgf1, iset1)*grid_atom%rad2(1:nr))
                  DO ipgf2 = 1, npgf(iset2)
                     ngau2 = n2*(ipgf2 - 1) + m2

                     g2(1:nr) = EXP(-zet(ipgf2, iset2)*grid_atom%rad2(1:nr))
                     lmin12 = lmin(iset1) + lmin(iset2)
                     lmax12 = lmax(iset1) + lmax(iset2)

                     !test reduce expansion local densities
                     IF (lmin12 .LE. lmax_expansion) THEN

                        gg = 0.0_dp
                        dgg = 0.0_dp

                        IF (lmin12 == 0) THEN
                           gg(1:nr, lmin12) = g1(1:nr)*g2(1:nr)
                        ELSE
                           gg(1:nr, lmin12) = grid_atom%rad2l(1:nr, lmin12)*g1(1:nr)*g2(1:nr)
                        END IF

                        !test reduce expansion local densities
                        IF (lmax12 .GT. lmax_expansion) lmax12 = lmax_expansion

                        DO l = lmin12 + 1, lmax12
                           gg(1:nr, l) = grid_atom%rad(1:nr)*gg(:, l - 1)
                           dgg(1:nr, l - 1) = dgg(1:nr, l - 1) - 2.0_dp*(zet(ipgf1, iset1) + &
                                                                         zet(ipgf2, iset2))*gg(1:nr, l)
                        END DO
                        dgg(1:nr, lmax12) = dgg(1:nr, lmax12) - 2.0_dp*(zet(ipgf1, iset1) + &
                                                                        zet(ipgf2, iset2))*grid_atom%rad(1:nr)* &
                                            gg(1:nr, lmax12)

                        gVXCg_h = 0.0_dp
                        gVXCg_s = 0.0_dp
                        gVXGg_h = 0.0_dp
                        gVXGg_s = 0.0_dp

                        ! Cross Term
                        DO l = lmin12, lmax12
                           DO ia = 1, na
                              DO ir = 1, nr
                                 gVXCg_h(ia, l) = gVXCg_h(ia, l) + &
                                                  gg(ir, l)*vxc_h(ia, ir, ispin) + &
                                                  dgg(ir, l)* &
                                                  (vxg_h(1, ia, ir, ispin)*harmonics%a(1, ia) + &
                                                   vxg_h(2, ia, ir, ispin)*harmonics%a(2, ia) + &
                                                   vxg_h(3, ia, ir, ispin)*harmonics%a(3, ia))

                                 gVXCg_s(ia, l) = gVXCg_s(ia, l) + &
                                                  gg(ir, l)*vxc_s(ia, ir, ispin) + &
                                                  dgg(ir, l)* &
                                                  (vxg_s(1, ia, ir, ispin)*harmonics%a(1, ia) + &
                                                   vxg_s(2, ia, ir, ispin)*harmonics%a(2, ia) + &
                                                   vxg_s(3, ia, ir, ispin)*harmonics%a(3, ia))

                                 urad = grid_atom%oorad2l(ir, 1)

                                 gVXGg_h(1, ia, l) = gVXGg_h(1, ia, l) + &
                                                     vxg_h(1, ia, ir, ispin)* &
                                                     gg(ir, l)*urad

                                 gVXGg_h(2, ia, l) = gVXGg_h(2, ia, l) + &
                                                     vxg_h(2, ia, ir, ispin)* &
                                                     gg(ir, l)*urad

                                 gVXGg_h(3, ia, l) = gVXGg_h(3, ia, l) + &
                                                     vxg_h(3, ia, ir, ispin)* &
                                                     gg(ir, l)*urad

                                 gVXGg_s(1, ia, l) = gVXGg_s(1, ia, l) + &
                                                     vxg_s(1, ia, ir, ispin)* &
                                                     gg(ir, l)*urad

                                 gVXGg_s(2, ia, l) = gVXGg_s(2, ia, l) + &
                                                     vxg_s(2, ia, ir, ispin)* &
                                                     gg(ir, l)*urad

                                 gVXGg_s(3, ia, l) = gVXGg_s(3, ia, l) + &
                                                     vxg_s(3, ia, ir, ispin)* &
                                                     gg(ir, l)*urad

                              END DO ! ir
                           END DO ! ia
                        END DO ! l

                        matso_h = 0.0_dp
                        matso_s = 0.0_dp
                        DO iso = 1, max_iso_not0_local
                           DO icg = 1, cg_n_list(iso)
                              iso1 = cg_list(1, icg, iso)
                              iso2 = cg_list(2, icg, iso)

                              l = indso(1, iso1) + indso(1, iso2)

                              !test reduce expansion local densities
                              CPASSERT(l <= lmax_expansion)
                              DO ia = 1, na
                                 matso_h(iso1, iso2) = matso_h(iso1, iso2) + &
                                                       gVXCg_h(ia, l)* &
                                                       harmonics%slm(ia, iso)* &
                                                       my_CG(iso1, iso2, iso)
                                 matso_s(iso1, iso2) = matso_s(iso1, iso2) + &
                                                       gVXCg_s(ia, l)* &
                                                       harmonics%slm(ia, iso)* &
                                                       my_CG(iso1, iso2, iso)
                              END DO ! ia

                              !test reduce expansion local densities

                           END DO

                        END DO ! iso

                        DO iso = 1, dmax_iso_not0_local
                           DO icg = 1, dcg_n_list(iso)
                              iso1 = dcg_list(1, icg, iso)
                              iso2 = dcg_list(2, icg, iso)

                              l = indso(1, iso1) + indso(1, iso2)
                              !test reduce expansion local densities
                              CPASSERT(l <= lmax_expansion)
                              DO ia = 1, na
                                 matso_h(iso1, iso2) = matso_h(iso1, iso2) + &
                                                       (gVXGg_h(1, ia, l)*my_CG_dxyz(1, iso1, iso2, iso) + &
                                                        gVXGg_h(2, ia, l)*my_CG_dxyz(2, iso1, iso2, iso) + &
                                                        gVXGg_h(3, ia, l)*my_CG_dxyz(3, iso1, iso2, iso))* &
                                                       harmonics%slm(ia, iso)

                                 matso_s(iso1, iso2) = matso_s(iso1, iso2) + &
                                                       (gVXGg_s(1, ia, l)*my_CG_dxyz(1, iso1, iso2, iso) + &
                                                        gVXGg_s(2, ia, l)*my_CG_dxyz(2, iso1, iso2, iso) + &
                                                        gVXGg_s(3, ia, l)*my_CG_dxyz(3, iso1, iso2, iso))* &
                                                       harmonics%slm(ia, iso)

                              END DO ! ia

                              !test reduce expansion local densities

                           END DO ! icg
                        END DO ! iso
                        !test reduce expansion local densities
                     END IF ! lmax_expansion

                     !  Write in the global matrix
                     DO ic = nsoset(lmin(iset2) - 1) + 1, nsoset(lmax(iset2))
                        iso1 = nsoset(lmin(iset1) - 1) + 1
                        iso2 = ngau2 + ic
                        CALL daxpy(size1, 1.0_dp, matso_h(iso1, ic), 1, &
                                   int_hh(ispin)%r_coef(nngau1 + 1, iso2), 1)
                        CALL daxpy(size1, 1.0_dp, matso_s(iso1, ic), 1, &
                                   int_ss(ispin)%r_coef(nngau1 + 1, iso2), 1)
                     END DO

                  END DO ! ipfg2
               END DO ! ipfg1
               m2 = m2 + maxso
            END DO ! iset2
            m1 = m1 + maxso
         END DO ! iset1
      END DO ! ispin

      DEALLOCATE (g1, g2, gg, dgg, matso_h, matso_s, gVXCg_h, gVXCg_s, gVXGg_h, gVXGg_s)
      DEALLOCATE (cg_list, cg_n_list, dcg_list, dcg_n_list)

      CALL timestop(handle)

   END SUBROUTINE gaVxcgb_GC

! **************************************************************************************************
!> \brief Integrates 0.5 * grad_ga .dot. (V_tau * grad_gb) on the atomic grid for meta-GGA
!> \param vtau_h the har tau potential
!> \param vtau_s the soft tau potential
!> \param int_hh ...
!> \param int_ss ...
!> \param grid_atom ...
!> \param basis_1c ...
!> \param harmonics ...
!> \param nspins ...
!> \note This is a rewrite to correct meta-GGA GAPW bug. This is more brute force than the original
!>       but makes sure that no corner is cut in terms of accuracy (A. Bussy)
! **************************************************************************************************
   SUBROUTINE dgaVtaudgb(vtau_h, vtau_s, int_hh, int_ss, &
                         grid_atom, basis_1c, harmonics, nspins)

      REAL(dp), DIMENSION(:, :, :), POINTER              :: vtau_h, vtau_s
      TYPE(rho_atom_coeff), DIMENSION(:), POINTER        :: int_hh, int_ss
      TYPE(grid_atom_type), POINTER                      :: grid_atom
      TYPE(gto_basis_set_type), POINTER                  :: basis_1c
      TYPE(harmonics_atom_type), POINTER                 :: harmonics
      INTEGER, INTENT(IN)                                :: nspins

      CHARACTER(len=*), PARAMETER                        :: routineN = 'dgaVtaudgb'

      INTEGER                                            :: dir, handle, ipgf, iset, iso, ispin, &
                                                            jpgf, jset, jso, l, maxso, na, nr, &
                                                            nset, starti, startj
      INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fga, fgr, r1, r2, work
      REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: a1, a2, intso_h, intso_s
      REAL(dp), DIMENSION(:, :), POINTER                 :: slm, zet
      REAL(dp), DIMENSION(:, :, :), POINTER              :: dslm_dxyz

      CALL timeset(routineN, handle)

      NULLIFY (zet, slm, dslm_dxyz, lmax, lmin, npgf)

      CALL get_gto_basis_set(gto_basis_set=basis_1c, lmax=lmax, lmin=lmin, &
                             maxso=maxso, npgf=npgf, nset=nset, zet=zet)

      na = grid_atom%ng_sphere
      nr = grid_atom%nr

      slm => harmonics%slm
      dslm_dxyz => harmonics%dslm_dxyz

      ! Separate the functions into purely r and purely angular parts and precompute them all
      ! Not memory intensive since 1D arrays
      ALLOCATE (a1(na, nset*maxso, 3), a2(na, nset*maxso, 3))
      ALLOCATE (r1(nr, nset*maxso), r2(nr, nset*maxso))
      a1 = 0.0_dp; a2 = 0.0_dp
      r1 = 0.0_dp; r2 = 0.0_dp

      DO iset = 1, nset
         DO ipgf = 1, npgf(iset)
            starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
            DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
               l = indso(1, iso)

               ! The x derivative of the spherical orbital, divided in angular and radial parts
               ! Two of each are needed because d/dx(r^l Y_lm) * exp(-al*r^2) + r^l Y_lm *  d/dx(exp-al*r^2)

               ! the purely radial part of d/dx(r^l Y_lm) * exp(-al*r^2) (same for y,z)
               r1(1:nr, starti + iso) = grid_atom%rad(1:nr)**(l - 1)*EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))

               ! the purely radial part of r^l Y_lm * d/dx(exp-al*r^2) (same for y,z)
               r2(1:nr, starti + iso) = -2.0_dp*zet(ipgf, iset)*grid_atom%rad(1:nr)**(l + 1) &
                                        *EXP(-zet(ipgf, iset)*grid_atom%rad2(1:nr))

               DO dir = 1, 3
                  ! the purely angular part of d/dx(r^l Y_lm) * exp(-al*r^2)
                  a1(1:na, starti + iso, dir) = dslm_dxyz(dir, 1:na, iso)

                  ! the purely angular part of r^l Y_lm * d/dx(exp-al*r^2)
                  a2(1:na, starti + iso, dir) = harmonics%a(dir, 1:na)*slm(1:na, iso)
               END DO

            END DO !iso
         END DO !ipgf
      END DO !iset

      !Do the integration in terms of matrix-matrix multiplications
      ALLOCATE (intso_h(nset*maxso, nset*maxso, nspins))
      ALLOCATE (intso_s(nset*maxso, nset*maxso, nspins))
      intso_h = 0.0_dp; intso_s = 0.0_dp

      ALLOCATE (fga(na, 1))
      ALLOCATE (fgr(nr, 1))
      ALLOCATE (work(na, 1))
      fga = 0.0_dp; fgr = 0.0_dp; work = 0.0_dp

      DO iset = 1, nset
         DO jset = 1, nset
            DO ipgf = 1, npgf(iset)
               starti = (iset - 1)*maxso + (ipgf - 1)*nsoset(lmax(iset))
               DO jpgf = 1, npgf(jset)
                  startj = (jset - 1)*maxso + (jpgf - 1)*nsoset(lmax(jset))

                  DO iso = nsoset(lmin(iset) - 1) + 1, nsoset(lmax(iset))
                     DO jso = nsoset(lmin(jset) - 1) + 1, nsoset(lmax(jset))

                        !Two component per function => 4 terms in total

                        ! take 0.5*r1*a1(dir) * V * r1*a1(dir)
                        fgr(1:nr, 1) = r1(1:nr, starti + iso)*r1(1:nr, startj + jso)
                        DO dir = 1, 3
                           fga(1:na, 1) = a1(1:na, starti + iso, dir)*a1(1:na, startj + jso, dir)

                           DO ispin = 1, nspins
                              CALL dgemm('N', 'N', na, 1, nr, 0.5_dp, vtau_h(:, :, ispin), na, fgr, &
                                         nr, 0.0_dp, work, na)
                              CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
                                         intso_h(starti + iso:, startj + jso, ispin), 1)

                              CALL dgemm('N', 'N', na, 1, nr, 0.5_dp, vtau_s(:, :, ispin), na, fgr, &
                                         nr, 0.0_dp, work, na)
                              CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
                                         intso_s(starti + iso:, startj + jso, ispin), 1)
                           END DO
                        END DO !dir

                        ! add 0.5*r1*a1(dir) * V * r2*a2(dir)
                        fgr(1:nr, 1) = r1(1:nr, starti + iso)*r2(1:nr, startj + jso)
                        DO dir = 1, 3
                           fga(1:na, 1) = a1(1:na, starti + iso, dir)*a2(1:na, startj + jso, dir)

                           DO ispin = 1, nspins
                              CALL dgemm('N', 'N', na, 1, nr, 0.5_dp, vtau_h(:, :, ispin), na, fgr, &
                                         nr, 0.0_dp, work, na)
                              CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
                                         intso_h(starti + iso:, startj + jso, ispin), 1)

                              CALL dgemm('N', 'N', na, 1, nr, 0.5_dp, vtau_s(:, :, ispin), na, fgr, &
                                         nr, 0.0_dp, work, na)
                              CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
                                         intso_s(starti + iso:, startj + jso, ispin), 1)
                           END DO
                        END DO !dir

                        ! add 0.5*r2*a2(dir) * V * r1*a1(dir)
                        fgr(1:nr, 1) = r2(1:nr, starti + iso)*r1(1:nr, startj + jso)
                        DO dir = 1, 3
                           fga(1:na, 1) = a2(1:na, starti + iso, dir)*a1(1:na, startj + jso, dir)

                           DO ispin = 1, nspins
                              CALL dgemm('N', 'N', na, 1, nr, 0.5_dp, vtau_h(:, :, ispin), na, fgr, &
                                         nr, 0.0_dp, work, na)
                              CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
                                         intso_h(starti + iso:, startj + jso, ispin), 1)

                              CALL dgemm('N', 'N', na, 1, nr, 0.5_dp, vtau_s(:, :, ispin), na, fgr, &
                                         nr, 0.0_dp, work, na)
                              CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
                                         intso_s(starti + iso:, startj + jso, ispin), 1)
                           END DO
                        END DO !dir

                        ! add the last term: 0.5*r2*a2(dir) * V * r2*a2(dir)
                        fgr(1:nr, 1) = r2(1:nr, starti + iso)*r2(1:nr, startj + jso)
                        DO dir = 1, 3
                           fga(1:na, 1) = a2(1:na, starti + iso, dir)*a2(1:na, startj + jso, dir)

                           DO ispin = 1, nspins
                              CALL dgemm('N', 'N', na, 1, nr, 0.5_dp, vtau_h(:, :, ispin), na, fgr, &
                                         nr, 0.0_dp, work, na)
                              CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
                                         intso_h(starti + iso:, startj + jso, ispin), 1)

                              CALL dgemm('N', 'N', na, 1, nr, 0.5_dp, vtau_s(:, :, ispin), na, fgr, &
                                         nr, 0.0_dp, work, na)
                              CALL dgemm('T', 'N', 1, 1, na, 1.0_dp, work, na, fga, na, 1.0_dp, &
                                         intso_s(starti + iso:, startj + jso, ispin), 1)
                           END DO
                        END DO !dir

                     END DO !jso
                  END DO !iso

               END DO !jpgf
            END DO !ipgf
         END DO !jset
      END DO !iset

      ! Put the integrals in the rho_atom data structure
      DO ispin = 1, nspins
         int_hh(ispin)%r_coef(:, :) = int_hh(ispin)%r_coef(:, :) + intso_h(:, :, ispin)
         int_ss(ispin)%r_coef(:, :) = int_ss(ispin)%r_coef(:, :) + intso_s(:, :, ispin)
      END DO

      CALL timestop(handle)

   END SUBROUTINE dgaVtaudgb

END MODULE qs_vxc_atom
