diff --git a/.github/workflows/self-hosted-ci.yml b/.github/workflows/self-hosted-ci.yml
index 87ce7db94..233c549ed 100644
--- a/.github/workflows/self-hosted-ci.yml
+++ b/.github/workflows/self-hosted-ci.yml
@@ -6,6 +6,7 @@ jobs:
   CI:
     runs-on: daint
     strategy:
+      fail-fast: false
       matrix:
         include:
         - config_name: pgi_default_gpu
@@ -64,7 +65,7 @@ jobs:
         source compiler_modules
         export RRTMGP_ROOT=$PWD
         export FC=ftn
-        # Compiler needs more temporary space than normally available 
+        # Compiler needs more temporary space than normally available
         mkdir -p $PWD/tmp
         export TMPDIR=$PWD/tmp
         make clean
diff --git a/Compiler-flags.md b/Compiler-flags.md
index cef9c3f42..ff763d7fc 100644
--- a/Compiler-flags.md
+++ b/Compiler-flags.md
@@ -27,4 +27,4 @@ root of the RTE+RRTMGP installation.
 ### Debugging flags
 `FCFLAGS: "-g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mallocatable=03  -Mpreprocess"`
 ### Optimization flags:  
-`FCFLAGS: "-m64 -O3 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132"`
+`FCFLAGS: "-g -O3 -fast -Minfo -Mallocatable=03 -Mpreprocess"`
diff --git a/examples/mo_load_coefficients.F90 b/examples/mo_load_coefficients.F90
index 6f4a6dd17..f40048c0c 100644
--- a/examples/mo_load_coefficients.F90
+++ b/examples/mo_load_coefficients.F90
@@ -35,6 +35,7 @@ subroutine stop_on_err(msg)
     use iso_fortran_env, only : error_unit
     character(len=*), intent(in) :: msg
 
+
     if(msg /= "") then
       write(error_unit, *) msg
       error stop 1
diff --git a/extensions/mo_rrtmgp_clr_all_sky.F90 b/extensions/mo_rrtmgp_clr_all_sky.F90
index 203e05384..755681cbb 100644
--- a/extensions/mo_rrtmgp_clr_all_sky.F90
+++ b/extensions/mo_rrtmgp_clr_all_sky.F90
@@ -176,12 +176,13 @@ function rte_lw(k_dist, gas_concs, p_lay, t_lay, p_lev,    &
                             inc_flux, n_gauss_angles)
 
     call sources%finalize()
+    call optical_props%finalize()
   end function rte_lw
   ! --------------------------------------------------
   function rte_sw(k_dist, gas_concs, p_lay, t_lay, p_lev, &
                                  mu0, sfc_alb_dir, sfc_alb_dif, cloud_props, &
                                  allsky_fluxes, clrsky_fluxes,           &
-                                 aer_props, col_dry, inc_flux, tsi_scaling) result(error_msg)
+                                 aer_props, col_dry, inc_flux) result(error_msg)
     class(ty_gas_optics),              intent(in   ) :: k_dist       !< derived type with spectral information
     type(ty_gas_concs),                intent(in   ) :: gas_concs    !< derived type encapsulating gas concentrations
     real(wp), dimension(:,:),          intent(in   ) :: p_lay, t_lay !< pressure [Pa], temperature [K] at layer centers (ncol,nlay)
@@ -198,7 +199,6 @@ function rte_sw(k_dist, gas_concs, p_lay, t_lay, p_lev, &
     real(wp), dimension(:,:), &
               optional,       intent(in ) :: col_dry, &  !< Molecular number density (ncol, nlay)
                                              inc_flux    !< incident flux at domain top [W/m2] (ncol, ngpts)
-    real(wp), optional,       intent(in ) :: tsi_scaling !< Optional scaling for total solar irradiance
 
     character(len=128)                    :: error_msg
     ! --------------------------------
@@ -227,9 +227,6 @@ function rte_sw(k_dist, gas_concs, p_lay, t_lay, p_lev, &
     ! ------------------------------------------------------------------------------------
     !  Error checking
     !
-    if(present(inc_flux) .and. present(tsi_scaling)) &
-      error_msg = "rrtmpg_sw: only one of inc_flux, tsi_scaling may be supplied."
-
     if(present(aer_props)) then
       if(any([aer_props%get_ncol(), &
               aer_props%get_nlay()] /= [ncol, nlay])) &
@@ -238,11 +235,6 @@ function rte_sw(k_dist, gas_concs, p_lay, t_lay, p_lev, &
         error_msg = "rrtmpg_sw: aerosol properties inconsistently sized"
     end if
 
-    if(present(tsi_scaling)) then
-      if(tsi_scaling <= 0._wp) &
-        error_msg = "rrtmpg_sw: tsi_scaling is < 0"
-    end if
-
     if(present(inc_flux)) then
       if(any([size(inc_flux, 1), &
               size(inc_flux, 2)] /= [ncol, ngpt])) &
@@ -291,7 +283,6 @@ function rte_sw(k_dist, gas_concs, p_lay, t_lay, p_lev, &
     ! If users have supplied an incident flux, use that
     !
     if(present(inc_flux))    toa_flux(:,:) = inc_flux(:,:)
-    if(present(tsi_scaling)) toa_flux(:,:) = toa_flux(:,:) * tsi_scaling
     ! ----------------------------------------------------
     ! Clear sky is gases + aerosols (if they're supplied)
     !
@@ -315,6 +306,7 @@ function rte_sw(k_dist, gas_concs, p_lay, t_lay, p_lev, &
                                sfc_alb_dir, sfc_alb_dif,        &
                                allsky_fluxes)
 
+    call optical_props%finalize()
   end function rte_sw
 
 end module mo_rrtmgp_clr_all_sky
diff --git a/rrtmgp/data/rrtmgp-data-lw-g128-210809.nc b/rrtmgp/data/rrtmgp-data-lw-g128-210809.nc
index e09f750ac..cf117a3f0 100644
Binary files a/rrtmgp/data/rrtmgp-data-lw-g128-210809.nc and b/rrtmgp/data/rrtmgp-data-lw-g128-210809.nc differ
diff --git a/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc b/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc
index 991ce908a..c66d3a545 100644
Binary files a/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc and b/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc differ
diff --git a/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 b/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90
index 093aa556e..71ec4be27 100644
--- a/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90
+++ b/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90
@@ -50,10 +50,10 @@ subroutine interpolation( &
     ! outputs
     integer,     dimension(ncol,nlay), intent(out) :: jtemp, jpress
     logical(wl), dimension(ncol,nlay), intent(out) :: tropo
-    integer,     dimension(2,    nflav,ncol,nlay), intent(out) :: jeta
-    real(wp),    dimension(2,    nflav,ncol,nlay), intent(out) :: col_mix
-    real(wp),    dimension(2,2,2,nflav,ncol,nlay), intent(out) :: fmajor
-    real(wp),    dimension(2,2,  nflav,ncol,nlay), intent(out) :: fminor
+    integer,     dimension(2,    ncol,nlay,nflav), intent(out) :: jeta
+    real(wp),    dimension(2,    ncol,nlay,nflav), intent(out) :: col_mix
+    real(wp),    dimension(2,2,2,ncol,nlay,nflav), intent(out) :: fmajor
+    real(wp),    dimension(2,2,  ncol,nlay,nflav), intent(out) :: fminor
     ! -----------------
     ! local
     real(wp), dimension(ncol,nlay) :: ftemp, fpress ! interpolation fraction for temperature, pressure
@@ -98,10 +98,10 @@ subroutine interpolation( &
     ! thinks it isn't present.
     !$acc parallel loop gang vector collapse(4) default(none) private(igases) present(vmr_ref)
     !$omp target teams distribute parallel do simd collapse(4) private(igases)
-    do ilay = 1, nlay
-      do icol = 1, ncol
+    do iflav = 1, nflav
+      do ilay = 1, nlay
         ! loop over implemented combinations of major species
-        do iflav = 1, nflav
+        do icol = 1, ncol
           do itemp = 1, 2
             igases(:) = flavor(:,iflav)
             ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
@@ -111,26 +111,26 @@ subroutine interpolation( &
             !  associated interpolation index and factors
             ratio_eta_half = vmr_ref(itropo,igases(1),(jtemp(icol,ilay)+itemp-1)) / &
                              vmr_ref(itropo,igases(2),(jtemp(icol,ilay)+itemp-1))
-            col_mix(itemp,iflav,icol,ilay) = col_gas(icol,ilay,igases(1)) + ratio_eta_half * col_gas(icol,ilay,igases(2))
-            eta = merge(col_gas(icol,ilay,igases(1)) / col_mix(itemp,iflav,icol,ilay), 0.5_wp, &
-                        col_mix(itemp,iflav,icol,ilay) > 2._wp * tiny(col_mix))
+            col_mix(itemp,icol,ilay,iflav) = col_gas(icol,ilay,igases(1)) + ratio_eta_half * col_gas(icol,ilay,igases(2))
+            eta = merge(col_gas(icol,ilay,igases(1)) / col_mix(itemp,icol,ilay,iflav), 0.5_wp, &
+                        col_mix(itemp,icol,ilay,iflav) > 2._wp * tiny(col_mix))
             loceta = eta * float(neta-1)
-            jeta(itemp,iflav,icol,ilay) = min(int(loceta)+1, neta-1)
+            jeta(itemp,icol,ilay,iflav) = min(int(loceta)+1, neta-1)
             feta = mod(loceta, 1.0_wp)
             ! compute interpolation fractions needed for minor species
             ! ftemp_term = (1._wp-ftemp(icol,ilay)) for itemp = 1, ftemp(icol,ilay) for itemp=2
             ftemp_term = (real(2-itemp, wp) + real(2*itemp-3, wp) * ftemp(icol,ilay))
-            fminor(1,itemp,iflav,icol,ilay) = (1._wp-feta) * ftemp_term
-            fminor(2,itemp,iflav,icol,ilay) =        feta  * ftemp_term
+            fminor(1,itemp,icol,ilay,iflav) = (1._wp-feta) * ftemp_term
+            fminor(2,itemp,icol,ilay,iflav) =        feta  * ftemp_term
             ! compute interpolation fractions needed for major species
-            fmajor(1,1,itemp,iflav,icol,ilay) = (1._wp-fpress(icol,ilay)) * fminor(1,itemp,iflav,icol,ilay)
-            fmajor(2,1,itemp,iflav,icol,ilay) = (1._wp-fpress(icol,ilay)) * fminor(2,itemp,iflav,icol,ilay)
-            fmajor(1,2,itemp,iflav,icol,ilay) =        fpress(icol,ilay)  * fminor(1,itemp,iflav,icol,ilay)
-            fmajor(2,2,itemp,iflav,icol,ilay) =        fpress(icol,ilay)  * fminor(2,itemp,iflav,icol,ilay)
+            fmajor(1,1,itemp,icol,ilay,iflav) = (1._wp-fpress(icol,ilay)) * fminor(1,itemp,icol,ilay,iflav)
+            fmajor(2,1,itemp,icol,ilay,iflav) = (1._wp-fpress(icol,ilay)) * fminor(2,itemp,icol,ilay,iflav)
+            fmajor(1,2,itemp,icol,ilay,iflav) =        fpress(icol,ilay)  * fminor(1,itemp,icol,ilay,iflav)
+            fmajor(2,2,itemp,icol,ilay,iflav) =        fpress(icol,ilay)  * fminor(2,itemp,icol,ilay,iflav)
           end do ! reference temperatures
-        end do ! iflav
-      end do ! icol,ilay
-    end do
+        end do ! icol
+      end do ! ilay
+    end do ! iflav
 
     !$acc end data
     !$omp end target data
@@ -179,9 +179,9 @@ subroutine compute_tau_absorption(                &
     ! inputs from object
     integer,     dimension(2,ngpt),                  intent(in) :: gpoint_flavor
     integer,     dimension(2,nbnd),                  intent(in) :: band_lims_gpt
-    real(wp),    dimension(ngpt,neta,npres+1,ntemp), intent(in) :: kmajor
-    real(wp),    dimension(nminorklower,neta,ntemp), intent(in) :: kminor_lower
-    real(wp),    dimension(nminorkupper,neta,ntemp), intent(in) :: kminor_upper
+    real(wp),    dimension(ntemp,neta,npres+1,ngpt), intent(in) :: kmajor
+    real(wp),    dimension(ntemp,neta,nminorklower), intent(in) :: kminor_lower
+    real(wp),    dimension(ntemp,neta,nminorkupper), intent(in) :: kminor_upper
     integer,     dimension(2,nminorlower),           intent(in) :: minor_limits_gpt_lower
     integer,     dimension(2,nminorupper),           intent(in) :: minor_limits_gpt_upper
     logical(wl), dimension(  nminorlower),           intent(in) :: minor_scales_with_density_lower
@@ -197,17 +197,17 @@ subroutine compute_tau_absorption(                &
     logical(wl), dimension(ncol,nlay),               intent(in) :: tropo
     ! ---------------------
     ! inputs from profile or parent function
-    real(wp), dimension(2,    nflav,ncol,nlay       ), intent(in) :: col_mix
-    real(wp), dimension(2,2,2,nflav,ncol,nlay       ), intent(in) :: fmajor
-    real(wp), dimension(2,2,  nflav,ncol,nlay       ), intent(in) :: fminor
+    real(wp), dimension(2,    ncol,nlay,nflav       ), intent(in) :: col_mix
+    real(wp), dimension(2,2,2,ncol,nlay,nflav       ), intent(in) :: fmajor
+    real(wp), dimension(2,2,  ncol,nlay,nflav       ), intent(in) :: fminor
     real(wp), dimension(            ncol,nlay       ), intent(in) :: play, tlay      ! pressure and temperature
     real(wp), dimension(            ncol,nlay,0:ngas), intent(in) :: col_gas
-    integer,  dimension(2,    nflav,ncol,nlay       ), intent(in) :: jeta
+    integer,  dimension(2,    ncol,nlay,nflav       ), intent(in) :: jeta
     integer,  dimension(            ncol,nlay       ), intent(in) :: jtemp
     integer,  dimension(            ncol,nlay       ), intent(in) :: jpress
     ! ---------------------
     ! output - optical depth
-    real(wp), dimension(ngpt,nlay,ncol), intent(inout) :: tau
+    real(wp), dimension(ncol,nlay,ngpt), intent(inout) :: tau
     ! ---------------------
     ! Local variables
     !
@@ -343,17 +343,17 @@ subroutine gas_optical_depths_major(ncol,nlay,nbnd,ngpt,&
     ! inputs from object
     integer,  dimension(2,ngpt),  intent(in) :: gpoint_flavor
     integer,  dimension(2,nbnd),  intent(in) :: band_lims_gpt ! start and end g-point for each band
-    real(wp), dimension(ngpt,neta,npres+1,ntemp), intent(in) :: kmajor
+    real(wp), dimension(ntemp,neta,npres+1,ngpt), intent(in) :: kmajor
 
     ! inputs from profile or parent function
-    real(wp),    dimension(2,    nflav,ncol,nlay), intent(in) :: col_mix
-    real(wp),    dimension(2,2,2,nflav,ncol,nlay), intent(in) :: fmajor
-    integer,     dimension(2,    nflav,ncol,nlay), intent(in) :: jeta
+    real(wp),    dimension(2,    ncol,nlay,nflav), intent(in) :: col_mix
+    real(wp),    dimension(2,2,2,ncol,nlay,nflav), intent(in) :: fmajor
+    integer,     dimension(2,    ncol,nlay,nflav), intent(in) :: jeta
     logical(wl), dimension(ncol,nlay), intent(in) :: tropo
     integer,     dimension(ncol,nlay), intent(in) :: jtemp, jpress
 
     ! outputs
-    real(wp), dimension(ngpt,nlay,ncol), intent(inout) :: tau
+    real(wp), dimension(ncol,nlay,ngpt), intent(inout) :: tau
     ! -----------------
     ! local variables
     real(wp) :: tau_major ! major species optical depth
@@ -365,11 +365,12 @@ subroutine gas_optical_depths_major(ncol,nlay,nbnd,ngpt,&
     ! -----------------
 
     ! optical depth calculation for major species
-    !$acc parallel loop collapse(3)
-    !$omp target teams distribute parallel do simd collapse(3)
+    !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilay = 1, nlay
       do icol = 1, ncol
-        ! optical depth calculation for major species
+
+        !$acc loop seq
         do igpt = 1, ngpt
           ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
           itropo = merge(1,2,tropo(icol,ilay))  ! WS: moved inside innermost loop
@@ -378,11 +379,12 @@ subroutine gas_optical_depths_major(ncol,nlay,nbnd,ngpt,&
           iflav = gpoint_flavor(itropo, igpt)
           tau_major = &
             ! interpolation in temperature, pressure, and eta
-            interpolate3D(col_mix(:,iflav,icol,ilay), &
-                          fmajor(:,:,:,iflav,icol,ilay), kmajor, &
-                          igpt, jeta(:,iflav,icol,ilay), jtemp(icol,ilay),jpress(icol,ilay)+itropo)
-          tau(igpt,ilay,icol) = tau(igpt,ilay,icol) + tau_major
+            interpolate3D(col_mix(:,icol,ilay,iflav), &
+                          fmajor(:,:,:,icol,ilay,iflav), kmajor, &
+                          igpt, jeta(:,icol,ilay,iflav), jtemp(icol,ilay),jpress(icol,ilay)+itropo)
+          tau(icol,ilay,igpt) = tau(icol,ilay,igpt) + tau_major
         end do ! igpt
+
       end do
     end do ! ilay
   end subroutine gas_optical_depths_major
@@ -411,7 +413,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
     integer,                                     intent(in   ) :: ntemp,neta,nminor,nminork
     integer,                                     intent(in   ) :: idx_h2o, idx_tropo
     integer,     dimension(2, ngpt),             intent(in   ) :: gpt_flv
-    real(wp),    dimension(nminork,neta,ntemp),  intent(in   ) :: kminor
+    real(wp),    dimension(ntemp,neta,nminork),  intent(in   ) :: kminor
     integer,     dimension(2,nminor),            intent(in   ) :: minor_limits_gpt
     logical(wl), dimension(  nminor),            intent(in   ) :: minor_scales_with_density
     logical(wl), dimension(  nminor),            intent(in   ) :: scale_by_complement
@@ -419,47 +421,48 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
     integer,     dimension(  nminor),            intent(in   ) :: idx_minor, idx_minor_scaling
     real(wp),    dimension(ncol,nlay),           intent(in   ) :: play, tlay
     real(wp),    dimension(ncol,nlay,0:ngas),    intent(in   ) :: col_gas
-    real(wp),    dimension(2,2,nflav,ncol,nlay), intent(in   ) :: fminor
-    integer,     dimension(2,  nflav,ncol,nlay), intent(in   ) :: jeta
+    real(wp),    dimension(2,2,ncol,nlay,nflav), intent(in   ) :: fminor
+    integer,     dimension(2,  ncol,nlay,nflav), intent(in   ) :: jeta
     integer,     dimension(ncol, 2),             intent(in   ) :: layer_limits
     integer,     dimension(ncol,nlay),           intent(in   ) :: jtemp
-    real(wp),    dimension(ngpt,nlay,ncol),      intent(inout) :: tau
+    real(wp),    dimension(ncol,nlay,ngpt),      intent(inout) :: tau
     ! -----------------
     ! local variables
     real(wp), parameter :: PaTohPa = 0.01_wp
     real(wp) :: vmr_fact, dry_fact             ! conversion from column abundance to dry vol. mixing ratio;
     real(wp) :: scaling, kminor_loc, tau_minor ! minor species absorption coefficient, optical depth
     integer  :: icol, ilay, iflav, igpt, imnr
-    integer  :: gptS, gptE
     integer  :: minor_start, minor_loc, extent
 
     real(wp) :: myplay, mytlay, mycol_gas_h2o, mycol_gas_imnr, mycol_gas_0
     real(wp) :: myfminor(2,2)
-    integer  :: myjtemp, myjeta(2), max_gpt_diff, igpt0
+    integer  :: myjtemp, myjeta(2)
     ! -----------------
 
     extent = size(scale_by_complement,dim=1)
 
-    ! Find the largest number of g-points per band
-    max_gpt_diff = maxval( minor_limits_gpt(2,:) - minor_limits_gpt(1,:) )
-
-    !$acc parallel loop gang vector collapse(3)
-    !$omp target teams distribute parallel do simd collapse(3)
+    !$acc parallel loop gang vector collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilay = 1 , nlay
       do icol = 1, ncol
-        do igpt0 = 0, max_gpt_diff
-          !
-          ! This check skips individual columns with no pressures in range
-          !
-          if ( layer_limits(icol,1) <= 0 .or. ilay < layer_limits(icol,1) .or. ilay > layer_limits(icol,2) ) cycle
+        !
+        ! This check skips individual columns with no pressures in range
+        !
+        if ( layer_limits(icol,1) <= 0 .or. ilay < layer_limits(icol,1) .or. ilay > layer_limits(icol,2) ) cycle
 
-          myplay  = play (icol,ilay)
-          mytlay  = tlay (icol,ilay)
-          myjtemp = jtemp(icol,ilay)
-          mycol_gas_h2o = col_gas(icol,ilay,idx_h2o)
-          mycol_gas_0   = col_gas(icol,ilay,0)
+        myplay  = play (icol,ilay)
+        mytlay  = tlay (icol,ilay)
+        myjtemp = jtemp(icol,ilay)
+        mycol_gas_h2o = col_gas(icol,ilay,idx_h2o)
+        mycol_gas_0   = col_gas(icol,ilay,0)
 
-          do imnr = 1, extent
+        !$acc loop seq
+        do imnr = 1, extent
+          ! What is the starting point in the stored array of minor absorption coefficients?
+          minor_start = kminor_start(imnr)
+
+          !$acc loop seq
+          do igpt = minor_limits_gpt(1,imnr), minor_limits_gpt(2,imnr)
 
             scaling = col_gas(icol,ilay,idx_minor(imnr))
             if (minor_scales_with_density(imnr)) then
@@ -484,33 +487,16 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
             !
             ! Interpolation of absorption coefficient and calculation of optical depth
             !
-            ! Which gpoint range does this minor gas affect?
-            gptS = minor_limits_gpt(1,imnr)
-            gptE = minor_limits_gpt(2,imnr)
-
-            ! Find the actual g-point to work on
-            igpt = igpt0 + gptS
-
-            ! Proceed only if the g-point is within the correct range
-            if (igpt <= gptE) then
-              ! What is the starting point in the stored array of minor absorption coefficients?
-              minor_start = kminor_start(imnr)
-
-              tau_minor = 0._wp
-              iflav = gpt_flv(idx_tropo,igpt) ! eta interpolation depends on flavor
-              minor_loc = minor_start + (igpt - gptS) ! add offset to starting point
-              kminor_loc = interpolate2D(fminor(:,:,iflav,icol,ilay), kminor, minor_loc, &
-                                          jeta(:,iflav,icol,ilay), myjtemp)
-              tau_minor = kminor_loc * scaling
-
-              !$acc atomic update
-              !$omp atomic update
-              tau(igpt,ilay,icol) = tau(igpt,ilay,icol) + tau_minor
-            endif
-
+            tau_minor = 0._wp
+            iflav = gpt_flv(idx_tropo,igpt) ! eta interpolation depends on flavor
+            minor_loc = minor_start + (igpt - minor_limits_gpt(1,imnr)) ! add offset to starting point
+            kminor_loc = interpolate2D(fminor(:,:,icol,ilay,iflav), kminor, minor_loc, &
+                                        jeta(:,icol,ilay,iflav), myjtemp)
+            tau_minor = kminor_loc * scaling
+            tau(icol,ilay,igpt) = tau(icol,ilay,igpt) + tau_minor
           enddo
-
         enddo
+
       enddo
     enddo
 
@@ -530,16 +516,16 @@ subroutine compute_tau_rayleigh(ncol,nlay,nbnd,ngpt,         &
     integer,                                     intent(in ) :: ngas,nflav,neta,npres,ntemp
     integer,     dimension(2,ngpt),              intent(in ) :: gpoint_flavor
     integer,     dimension(2,nbnd),              intent(in ) :: band_lims_gpt ! start and end g-point for each band
-    real(wp),    dimension(ngpt,neta,ntemp,2),   intent(in ) :: krayl
+    real(wp),    dimension(ntemp,neta,ngpt,2),   intent(in ) :: krayl
     integer,                                     intent(in ) :: idx_h2o
     real(wp),    dimension(ncol,nlay),           intent(in ) :: col_dry
     real(wp),    dimension(ncol,nlay,0:ngas),    intent(in ) :: col_gas
-    real(wp),    dimension(2,2,nflav,ncol,nlay), intent(in ) :: fminor
-    integer,     dimension(2,  nflav,ncol,nlay), intent(in ) :: jeta
+    real(wp),    dimension(2,2,ncol,nlay,nflav), intent(in ) :: fminor
+    integer,     dimension(2,  ncol,nlay,nflav), intent(in ) :: jeta
     logical(wl), dimension(ncol,nlay),           intent(in ) :: tropo
     integer,     dimension(ncol,nlay),           intent(in ) :: jtemp
     ! outputs
-    real(wp),    dimension(ngpt,nlay,ncol),      intent(out) :: tau_rayleigh
+    real(wp),    dimension(ncol,nlay,ngpt),      intent(out) :: tau_rayleigh
     ! -----------------
     ! local variables
     real(wp) :: k ! rayleigh scattering coefficient
@@ -547,17 +533,18 @@ subroutine compute_tau_rayleigh(ncol,nlay,nbnd,ngpt,         &
     integer  :: itropo
     ! -----------------
 
-    !$acc parallel loop collapse(3)
-    !$omp target teams distribute parallel do simd collapse(3)
+    !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilay = 1, nlay
       do icol = 1, ncol
+        !$acc loop seq
         do igpt = 1, ngpt
           itropo = merge(1,2,tropo(icol,ilay)) ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
           iflav = gpoint_flavor(itropo, igpt)
-          k = interpolate2D(fminor(:,:,iflav,icol,ilay), &
+          k = interpolate2D(fminor(:,:,icol,ilay,iflav), &
                             krayl(:,:,:,itropo),      &
-                            igpt, jeta(:,iflav,icol,ilay), jtemp(icol,ilay))
-          tau_rayleigh(igpt,ilay,icol) =  k * (col_gas(icol,ilay,idx_h2o)+col_dry(icol,ilay))
+                            igpt, jeta(:,icol,ilay,iflav), jtemp(icol,ilay))
+          tau_rayleigh(icol,ilay,igpt) =  k * (col_gas(icol,ilay,idx_h2o)+col_dry(icol,ilay))
         end do
       end do
     end do
@@ -579,23 +566,22 @@ subroutine compute_Planck_source(                        &
     real(wp),    dimension(ncol       ),        intent(in) :: tsfc
     integer,                                    intent(in) :: sfc_lay
     ! Interpolation variables
-    real(wp),    dimension(2,2,2,nflav,ncol,nlay), intent(in) :: fmajor
-    integer,     dimension(2,    nflav,ncol,nlay), intent(in) :: jeta
+    real(wp),    dimension(2,2,2,ncol,nlay,nflav), intent(in) :: fmajor
+    integer,     dimension(2,    ncol,nlay,nflav), intent(in) :: jeta
     logical(wl), dimension(            ncol,nlay), intent(in) :: tropo
     integer,     dimension(            ncol,nlay), intent(in) :: jtemp, jpress
     ! Table-specific
     integer, dimension(ngpt),                     intent(in) :: gpoint_bands ! start and end g-point for each band
     integer, dimension(2, nbnd),                  intent(in) :: band_lims_gpt ! start and end g-point for each band
     real(wp),                                     intent(in) :: temp_ref_min, totplnk_delta
-    real(wp), dimension(ngpt,neta,npres+1,ntemp), intent(in) :: pfracin
+    real(wp), dimension(ntemp,neta,npres+1,ngpt), intent(in) :: pfracin
     real(wp), dimension(nPlanckTemp,nbnd),        intent(in) :: totplnk
     integer,  dimension(2,ngpt),                  intent(in) :: gpoint_flavor
 
-    real(wp), dimension(ngpt,     ncol), intent(out) :: sfc_src
-    real(wp), dimension(ngpt,nlay,ncol), intent(out) :: lay_src
-    real(wp), dimension(ngpt,nlay,ncol), intent(out) :: lev_src_inc, lev_src_dec
-
-    real(wp), dimension(ngpt,     ncol), intent(out) :: sfc_source_Jac
+    real(wp), dimension(ncol,     ngpt), intent(out) :: sfc_src
+    real(wp), dimension(ncol,nlay,ngpt), intent(out) :: lay_src
+    real(wp), dimension(ncol,nlay,ngpt), intent(out) :: lev_src_inc, lev_src_dec
+    real(wp), dimension(ncol,     ngpt), intent(out) :: sfc_source_Jac
     ! -----------------
     ! local
     real(wp), parameter                             :: delta_Tsurf = 1.0_wp
@@ -603,145 +589,72 @@ subroutine compute_Planck_source(                        &
     integer  :: ilay, icol, igpt, ibnd, itropo, iflav
     integer  :: gptS, gptE
     real(wp), dimension(2), parameter :: one = [1._wp, 1._wp]
-    real(wp) :: pfrac          (ngpt,nlay,  ncol)
-    real(wp) :: planck_function(nbnd,nlay+1,ncol)
+    real(wp) :: pfrac
+    real(wp) :: planck_function_1, planck_function_2
     ! -----------------
 
-    !$acc enter data copyin(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor)
-    !$omp target enter data map(to:tlay, tlev, tsfc, fmajor, jeta, tropo, jtemp, jpress, gpoint_bands, pfracin, totplnk, gpoint_flavor)
-    !$acc enter data create(sfc_src,lay_src,lev_src_inc,lev_src_dec)
-    !$omp target enter data map(alloc:sfc_src, lay_src, lev_src_inc, lev_src_dec)
-    !$acc enter data create(pfrac,planck_function)
-    !$omp target enter data map(alloc:pfrac, planck_function)
-    !$acc enter data create(sfc_source_Jac)
-    !$omp target enter data map(alloc:sfc_source_Jac)
+    !$acc        data copyin(   tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor) &
+    !$acc             copyout(  sfc_src,lay_src,lev_src_inc,lev_src_dec,sfc_source_Jac)
+    !$omp target data map(   to:tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor) &
+    !$omp             map(from: sfc_src,lay_src,lev_src_inc,lev_src_dec,sfc_source_Jac) 
 
     ! Calculation of fraction of band's Planck irradiance associated with each g-point
-    !$acc parallel loop collapse(3)
-    !$omp target teams distribute parallel do simd collapse(3)
-    do icol = 1, ncol
-      do ilay = 1, nlay
+    !$acc parallel loop tile(128,2)
+    !$omp target teams distribute parallel do simd collapse(2)
+    do ilay = 1, nlay
+      do icol = 1, ncol
+
+        !$acc loop seq
         do igpt = 1, ngpt
+          ibnd = gpoint_bands(igpt)
           ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
           itropo = merge(1,2,tropo(icol,ilay))  !WS moved itropo inside loop for GPU
           iflav = gpoint_flavor(itropo, igpt) !eta interpolation depends on band's flavor
-          pfrac(igpt,ilay,icol) = &
+          pfrac = &
             ! interpolation in temperature, pressure, and eta
-            interpolate3D(one, fmajor(:,:,:,iflav,icol,ilay), pfracin, &
-                          igpt, jeta(:,iflav,icol,ilay), jtemp(icol,ilay),jpress(icol,ilay)+itropo)
-        end do ! igpt
-      end do   ! layer
-    end do     ! column
-
-    !
-    ! Planck function by band for the surface
-    ! Compute surface source irradiance for g-point, equals band irradiance x fraction for g-point
-    !
-    !$acc parallel loop
-    !$omp target teams distribute parallel do simd
-    do icol = 1, ncol
-      call interpolate1D(tsfc(icol)              , temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,1,icol))
-      call interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,2,icol))
-    end do
-    !
-    ! Map to g-points
-    !
-    !$acc parallel loop collapse(2)
-    !$omp target teams distribute parallel do simd collapse(2)
-    do igpt = 1, ngpt
-      do icol = 1, ncol
-        sfc_src       (igpt,icol) = pfrac(igpt,sfc_lay,icol) * planck_function(gpoint_bands(igpt),1,icol)
-        sfc_source_Jac(igpt,icol) = pfrac(igpt,sfc_lay,icol) * &
-                 (planck_function(gpoint_bands(igpt),2,icol) - planck_function(gpoint_bands(igpt),1,icol))
-      end do
-    end do ! icol
-
-    !$acc parallel loop collapse(2)
-    !$omp target teams distribute parallel do simd collapse(2)
-    do icol = 1, ncol
-      do ilay = 1, nlay
-        ! Compute layer source irradiance for g-point, equals band irradiance x fraction for g-point
-        call interpolate1D(tlay(icol,ilay), temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,ilay,icol))
-      end do
-    end do
-    !
-    ! Map to g-points
-    !
-    ! Explicitly unroll a time-consuming loop here to increase instruction-level parallelism on a GPU
-    ! Helps to achieve higher bandwidth
-    !
-    !$acc parallel loop present(planck_function) collapse(3)
-    !$omp target teams distribute parallel do simd collapse(3)
-    do icol = 1, ncol, 2
-      do ilay = 1, nlay
-        do igpt = 1, ngpt
-          lay_src(igpt,ilay,icol  ) = pfrac(igpt,ilay,icol  ) * planck_function(gpoint_bands(igpt),ilay,icol)
-          if (icol < ncol) &
-          lay_src(igpt,ilay,icol+1) = pfrac(igpt,ilay,icol+1) * planck_function(gpoint_bands(igpt),ilay,icol+1)
-        end do
-      end do ! ilay
-    end do ! icol
-
-    ! compute level source irradiances for each g-point, one each for upward and downward paths
-    !$acc parallel loop
-    !$omp target teams distribute parallel do simd
-    do icol = 1, ncol
-      call interpolate1D(tlev(icol,     1), temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,       1,icol))
-    end do
-
-    !$acc parallel loop collapse(2)
-    !$omp target teams distribute parallel do simd collapse(2)
-    do icol = 1, ncol
-      do ilay = 2, nlay+1
-        call interpolate1D(tlev(icol,ilay), temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,ilay,icol))
-      end do
-    end do
-
-    !
-    ! Map to g-points
-    !
-    ! Same unrolling as mentioned before
-    !
-    !$acc parallel loop present(planck_function) collapse(3)
-    !$omp target teams distribute parallel do simd collapse(3)
-    do icol = 1, ncol, 2
-      do ilay = 1, nlay
-        do igpt = 1, ngpt
-          lev_src_dec(igpt,ilay,icol  ) = pfrac(igpt,ilay,icol  ) * planck_function(gpoint_bands(igpt),ilay,  icol  )
-          lev_src_inc(igpt,ilay,icol  ) = pfrac(igpt,ilay,icol  ) * planck_function(gpoint_bands(igpt),ilay+1,icol  )
-          if (icol < ncol) then
-          lev_src_dec(igpt,ilay,icol+1) = pfrac(igpt,ilay,icol+1) * planck_function(gpoint_bands(igpt),ilay,  icol+1)
-          lev_src_inc(igpt,ilay,icol+1) = pfrac(igpt,ilay,icol+1) * planck_function(gpoint_bands(igpt),ilay+1,icol+1)
+            interpolate3D(one, fmajor(:,:,:,icol,ilay,iflav), pfracin, &
+                          igpt, jeta(:,icol,ilay,iflav), jtemp(icol,ilay),jpress(icol,ilay)+itropo)
+       
+          ! Compute layer source irradiance for g-point, equals band irradiance x fraction for g-point
+          planck_function_1 = interpolate1D(tlay(icol,ilay), temp_ref_min, totplnk_delta, totplnk(:,ibnd))
+          lay_src(icol,ilay,igpt) = pfrac * planck_function_1
+        
+          ! Compute layer source irradiance for g-point, equals band irradiance x fraction for g-point
+          planck_function_1 = interpolate1D(tlev(icol,ilay),   temp_ref_min, totplnk_delta, totplnk(:,ibnd))
+          planck_function_2 = interpolate1D(tlev(icol,ilay+1), temp_ref_min, totplnk_delta, totplnk(:,ibnd))
+          lev_src_dec(icol,ilay,igpt) = pfrac * planck_function_1
+          lev_src_inc(icol,ilay,igpt) = pfrac * planck_function_2
+
+          if (ilay == sfc_lay) then
+            planck_function_1 = interpolate1D(tsfc(icol)              , temp_ref_min, totplnk_delta, totplnk(:,ibnd))
+            planck_function_2 = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk(:,ibnd))
+    
+            sfc_src       (icol,igpt) = pfrac * planck_function_1
+            sfc_source_Jac(icol,igpt) = pfrac * (planck_function_2 - planck_function_1)
           end if
-        end do
-      end do ! ilay
-    end do ! icol
+        end do ! igpt
 
-    !$acc exit data delete(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor)
-    !$omp target exit data map(release:tlay, tlev, tsfc, fmajor, jeta, tropo, jtemp, jpress, gpoint_bands, pfracin, totplnk, gpoint_flavor)
-    !$acc exit data delete(pfrac,planck_function)
-    !$omp target exit data map(release:pfrac, planck_function)
-    !$acc exit data copyout(sfc_src,lay_src,lev_src_inc,lev_src_dec)
-    !$omp target exit data map(from:sfc_src, lay_src, lev_src_inc, lev_src_dec)
-    !$acc exit data copyout(sfc_source_Jac)
-    !$omp target exit data map(from:sfc_source_Jac)
+      end do ! icol
+    end do ! ilay
 
+    !$acc end        data
+    !$omp end target data
   end subroutine compute_Planck_source
   ! ----------------------------------------------------------
   !
-  ! One dimensional interpolation -- return all values along second table dimension
+  ! One dimensional interpolation
   !
-  subroutine interpolate1D(val, offset, delta, table, res)
-  !$acc routine seq
-  !$omp declare target
+  function interpolate1D(val, offset, delta, table) result(res)
+    !$acc routine seq
+    !$omp declare target
     ! input
     real(wp), intent(in) :: val,    & ! axis value at which to evaluate table
                             offset, & ! minimum of table axis
                             delta     ! step size of table axis
-    real(wp), dimension(:,:), &
+    real(wp), dimension(:), &
               intent(in) :: table ! dimensions (axis, values)
     ! output
-    real(wp), intent(out) ,dimension(size(table,dim=2)) :: res
+    real(wp)             :: res
 
     ! local
     real(wp) :: val0 ! fraction index adjusted by offset and delta
@@ -751,8 +664,8 @@ subroutine interpolate1D(val, offset, delta, table, res)
     val0 = (val - offset) / delta
     frac = val0 - int(val0) ! get fractional part
     index = min(size(table,dim=1)-1, max(1, int(val0)+1)) ! limit the index range
-    res(:) = table(index,:) + frac * (table(index+1,:) - table(index,:))
-  end subroutine interpolate1D
+    res = table(index) + frac * (table(index+1) - table(index))
+  end function interpolate1D
   ! ------------
   !   This function returns a single value from a subset (in gpoint) of the k table
   !
@@ -768,10 +681,10 @@ function interpolate2D(fminor, k, igpt, jeta, jtemp) result(res)
     real(wp)                             :: res ! the result
 
     res =  &
-      fminor(1,1) * k(igpt, jeta(1)  , jtemp  ) + &
-      fminor(2,1) * k(igpt, jeta(1)+1, jtemp  ) + &
-      fminor(1,2) * k(igpt, jeta(2)  , jtemp+1) + &
-      fminor(2,2) * k(igpt, jeta(2)+1, jtemp+1)
+      fminor(1,1) * k(jtemp  , jeta(1)  , igpt) + &
+      fminor(2,1) * k(jtemp  , jeta(1)+1, igpt) + &
+      fminor(1,2) * k(jtemp+1, jeta(2)  , igpt) + &
+      fminor(2,2) * k(jtemp+1, jeta(2)+1, igpt)
   end function interpolate2D
 
   ! ----------------------------------------------------------
@@ -793,109 +706,16 @@ function interpolate3D(scaling, fmajor, k, igpt, jeta, jtemp, jpress) result(res
     ! each code block is for a different reference temperature
     res =  &
       scaling(1) * &
-      ( fmajor(1,1,1) * k(igpt, jeta(1)  , jpress-1, jtemp  ) + &
-        fmajor(2,1,1) * k(igpt, jeta(1)+1, jpress-1, jtemp  ) + &
-        fmajor(1,2,1) * k(igpt, jeta(1)  , jpress  , jtemp  ) + &
-        fmajor(2,2,1) * k(igpt, jeta(1)+1, jpress  , jtemp  ) ) + &
+      ( fmajor(1,1,1) * k(jtemp, jeta(1)  , jpress-1, igpt  ) + &
+        fmajor(2,1,1) * k(jtemp, jeta(1)+1, jpress-1, igpt  ) + &
+        fmajor(1,2,1) * k(jtemp, jeta(1)  , jpress  , igpt  ) + &
+        fmajor(2,2,1) * k(jtemp, jeta(1)+1, jpress  , igpt  ) ) + &
       scaling(2) * &
-      ( fmajor(1,1,2) * k(igpt, jeta(2)  , jpress-1, jtemp+1) + &
-        fmajor(2,1,2) * k(igpt, jeta(2)+1, jpress-1, jtemp+1) + &
-        fmajor(1,2,2) * k(igpt, jeta(2)  , jpress  , jtemp+1) + &
-        fmajor(2,2,2) * k(igpt, jeta(2)+1, jpress  , jtemp+1) )
+      ( fmajor(1,1,2) * k(jtemp+1, jeta(2)  , jpress-1, igpt) + &
+        fmajor(2,1,2) * k(jtemp+1, jeta(2)+1, jpress-1, igpt) + &
+        fmajor(1,2,2) * k(jtemp+1, jeta(2)  , jpress  , igpt) + &
+        fmajor(2,2,2) * k(jtemp+1, jeta(2)+1, jpress  , igpt) )
   end function interpolate3D
-
-  ! ----------------------------------------------------------
-  !
-  ! Combine absoprtion and Rayleigh optical depths for total tau, ssa, g
-  !
-  subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau, ssa, g) &
-      bind(C, name="combine_and_reorder_2str")
-    integer,                             intent(in) :: ncol, nlay, ngpt
-    real(wp), dimension(ngpt,nlay,ncol), intent(in   ) :: tau_abs, tau_rayleigh
-    real(wp), dimension(ncol,nlay,ngpt), intent(inout) :: tau, ssa, g ! inout because components are allocated
-    ! -----------------------
-    integer  :: icol, ilay, igpt,  icol0, igpt0, icdiff, igdiff
-    real(wp) :: t
-    integer, parameter :: tile = 32
-    ! -----------------------
-    !$acc data copy(tau, ssa, g)                 &
-    !$acc      copyin(tau_rayleigh, tau_abs)
-
-    call zero_array(ncol, nlay, ngpt, g)
-    ! We are using blocking memory accesses here to improve performance
-    !  of the transpositions. See also comments in mo_rrtmgp_util_reorder_kernels.F90
-    !
-    !$acc parallel default(none) vector_length(tile*tile)
-    !$acc loop gang collapse(3)
-    !$omp target teams distribute parallel do simd collapse(3) &
-    !$omp& map(tofrom:tau, ssa, g) map(to:tau_rayleigh, tau_abs)
-    do ilay = 1, nlay
-      do icol0 = 1, ncol, tile
-        do igpt0 = 1, ngpt, tile
-
-          !$acc loop vector collapse(2)
-          do igdiff = 0, tile-1
-            do icdiff = 0, tile-1
-              icol = icol0 + icdiff
-              igpt = igpt0 + igdiff
-              if (icol > ncol .or. igpt > ngpt) cycle
-               t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol)
-               tau(icol,ilay,igpt) = t
-               if(t > 2._wp * tiny(t)) then
-                 ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t
-               else
-                 ssa(icol,ilay,igpt) = 0._wp
-               end if
-            end do
-          end do
-
-        end do
-      end do
-    end do
-    !$acc end parallel
-    !$acc end data
-  end subroutine combine_and_reorder_2str
-  ! ----------------------------------------------------------
-  !
-  ! Combine absoprtion and Rayleigh optical depths for total tau, ssa, p
-  !   using Rayleigh scattering phase function
-  !
-  subroutine combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau_abs, tau_rayleigh, tau, ssa, p) &
-      bind(C, name="combine_and_reorder_nstr")
-    integer, intent(in) :: ncol, nlay, ngpt, nmom
-    real(wp), dimension(ngpt,nlay,ncol), intent(in ) :: tau_abs, tau_rayleigh
-    real(wp), dimension(ncol,nlay,ngpt), intent(inout) :: tau, ssa
-    real(wp), dimension(ncol,nlay,ngpt,nmom), &
-                                         intent(inout) :: p
-    ! -----------------------
-    integer :: icol, ilay, igpt, imom
-    real(wp) :: t
-    ! -----------------------
-    !$acc parallel loop collapse(3) &
-    !$acc&     copy(tau, ssa, p) &
-    !$acc&     copyin(tau_rayleigh(:ngpt,:nlay,:ncol),tau_abs(:ngpt,:nlay,:ncol))
-    !$omp target teams distribute parallel do simd collapse(3) &
-    !$omp& map(tofrom:tau, ssa, p) &
-    !$omp& map(to:tau_rayleigh(:ngpt, :nlay, :ncol), tau_abs(:ngpt, :nlay, :ncol))
-    do icol = 1, ncol
-      do ilay = 1, nlay
-        do igpt = 1, ngpt
-          t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol)
-          tau(icol,ilay,igpt) = t
-          if(t > 2._wp * tiny(t)) then
-            ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t
-          else
-            ssa(icol,ilay,igpt) = 0._wp
-          end if
-          do imom = 1, nmom
-            p(imom,icol,ilay,igpt) = 0.0_wp
-          end do
-          if(nmom >= 2) p(2,icol,ilay,igpt) = 0.1_wp
-        end do
-      end do
-    end do
-  end subroutine combine_and_reorder_nstr
-
   ! ----------------------------------------------------------
   !
   ! In-house subroutine for handling minloc and maxloc for
diff --git a/rrtmgp/kernels/mo_gas_optics_kernels.F90 b/rrtmgp/kernels/mo_gas_optics_kernels.F90
index 4cf77dbcd..3eb17cbd5 100644
--- a/rrtmgp/kernels/mo_gas_optics_kernels.F90
+++ b/rrtmgp/kernels/mo_gas_optics_kernels.F90
@@ -50,10 +50,10 @@ subroutine interpolation( &
     ! outputs
     integer,     dimension(ncol,nlay), intent(out) :: jtemp, jpress
     logical(wl), dimension(ncol,nlay), intent(out) :: tropo
-    integer,     dimension(2,    nflav,ncol,nlay), intent(out) :: jeta
-    real(wp),    dimension(2,    nflav,ncol,nlay), intent(out) :: col_mix
-    real(wp),    dimension(2,2,2,nflav,ncol,nlay), intent(out) :: fmajor
-    real(wp),    dimension(2,2,  nflav,ncol,nlay), intent(out) :: fminor
+    integer,     dimension(2,    ncol,nlay,nflav), intent(out) :: jeta
+    real(wp),    dimension(2,    ncol,nlay,nflav), intent(out) :: col_mix
+    real(wp),    dimension(2,2,2,ncol,nlay,nflav), intent(out) :: fmajor
+    real(wp),    dimension(2,2,  ncol,nlay,nflav), intent(out) :: fminor
     ! -----------------
     ! local
     real(wp), dimension(ncol,nlay) :: ftemp, fpress ! interpolation fraction for temperature, pressure
@@ -84,39 +84,39 @@ subroutine interpolation( &
       end do
     end do
 
-    do ilay = 1, nlay
-      do icol = 1, ncol
+    do iflav = 1, nflav
+      igases(:) = flavor(:,iflav)
+      do ilay = 1, nlay
+        do icol = 1, ncol
         ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
         itropo = merge(1,2,tropo(icol,ilay))
         ! loop over implemented combinations of major species
-        do iflav = 1, nflav
-          igases(:) = flavor(:,iflav)
           do itemp = 1, 2
             ! compute interpolation fractions needed for lower, then upper reference temperature level
             ! compute binary species parameter (eta) for flavor and temperature and
             !  associated interpolation index and factors
             ratio_eta_half = vmr_ref(itropo,igases(1),(jtemp(icol,ilay)+itemp-1)) / &
                              vmr_ref(itropo,igases(2),(jtemp(icol,ilay)+itemp-1))
-            col_mix(itemp,iflav,icol,ilay) = col_gas(icol,ilay,igases(1)) + ratio_eta_half * col_gas(icol,ilay,igases(2))
-            eta = merge(col_gas(icol,ilay,igases(1)) / col_mix(itemp,iflav,icol,ilay), 0.5_wp, &
-                        col_mix(itemp,iflav,icol,ilay) > 2._wp * tiny(col_mix))
+            col_mix(itemp,icol,ilay,iflav) = col_gas(icol,ilay,igases(1)) + ratio_eta_half * col_gas(icol,ilay,igases(2))
+            eta = merge(col_gas(icol,ilay,igases(1)) / col_mix(itemp,icol,ilay,iflav), 0.5_wp, &
+                        col_mix(itemp,icol,ilay,iflav) > 2._wp * tiny(col_mix))
             loceta = eta * float(neta-1)
-            jeta(itemp,iflav,icol,ilay) = min(int(loceta)+1, neta-1)
+            jeta(itemp,icol,ilay,iflav) = min(int(loceta)+1, neta-1)
             feta = mod(loceta, 1.0_wp)
             ! compute interpolation fractions needed for minor species
             ! ftemp_term = (1._wp-ftemp(icol,ilay)) for itemp = 1, ftemp(icol,ilay) for itemp=2
             ftemp_term = (real(2-itemp, wp) + real(2*itemp-3, wp) * ftemp(icol,ilay))
-            fminor(1,itemp,iflav,icol,ilay) = (1._wp-feta) * ftemp_term
-            fminor(2,itemp,iflav,icol,ilay) =        feta  * ftemp_term
+            fminor(1,itemp,icol,ilay,iflav) = (1._wp-feta) * ftemp_term
+            fminor(2,itemp,icol,ilay,iflav) =        feta  * ftemp_term
             ! compute interpolation fractions needed for major species
-            fmajor(1,1,itemp,iflav,icol,ilay) = (1._wp-fpress(icol,ilay)) * fminor(1,itemp,iflav,icol,ilay)
-            fmajor(2,1,itemp,iflav,icol,ilay) = (1._wp-fpress(icol,ilay)) * fminor(2,itemp,iflav,icol,ilay)
-            fmajor(1,2,itemp,iflav,icol,ilay) =        fpress(icol,ilay)  * fminor(1,itemp,iflav,icol,ilay)
-            fmajor(2,2,itemp,iflav,icol,ilay) =        fpress(icol,ilay)  * fminor(2,itemp,iflav,icol,ilay)
+            fmajor(1,1,itemp,icol,ilay,iflav) = (1._wp-fpress(icol,ilay)) * fminor(1,itemp,icol,ilay,iflav)
+            fmajor(2,1,itemp,icol,ilay,iflav) = (1._wp-fpress(icol,ilay)) * fminor(2,itemp,icol,ilay,iflav)
+            fmajor(1,2,itemp,icol,ilay,iflav) =        fpress(icol,ilay)  * fminor(1,itemp,icol,ilay,iflav)
+            fmajor(2,2,itemp,icol,ilay,iflav) =        fpress(icol,ilay)  * fminor(2,itemp,icol,ilay,iflav)
           end do ! reference temperatures
-        end do ! iflav
-      end do ! icol,ilay
-    end do
+        end do ! icol
+      end do ! ilay
+    end do ! iflav
 
   end subroutine interpolation
   ! --------------------------------------------------------------------------------------
@@ -162,9 +162,9 @@ subroutine compute_tau_absorption(                &
     ! inputs from object
     integer,     dimension(2,ngpt),                  intent(in) :: gpoint_flavor
     integer,     dimension(2,nbnd),                  intent(in) :: band_lims_gpt
-    real(wp),    dimension(ngpt,neta,npres+1,ntemp), intent(in) :: kmajor
-    real(wp),    dimension(nminorklower,neta,ntemp), intent(in) :: kminor_lower
-    real(wp),    dimension(nminorkupper,neta,ntemp), intent(in) :: kminor_upper
+    real(wp),    dimension(ntemp,neta,npres+1,ngpt), intent(in) :: kmajor
+    real(wp),    dimension(ntemp,neta,nminorklower), intent(in) :: kminor_lower
+    real(wp),    dimension(ntemp,neta,nminorkupper), intent(in) :: kminor_upper
     integer,     dimension(2,nminorlower),           intent(in) :: minor_limits_gpt_lower
     integer,     dimension(2,nminorupper),           intent(in) :: minor_limits_gpt_upper
     logical(wl), dimension(  nminorlower),           intent(in) :: minor_scales_with_density_lower
@@ -180,17 +180,17 @@ subroutine compute_tau_absorption(                &
     logical(wl), dimension(ncol,nlay),               intent(in) :: tropo
     ! ---------------------
     ! inputs from profile or parent function
-    real(wp), dimension(2,    nflav,ncol,nlay       ), intent(in) :: col_mix
-    real(wp), dimension(2,2,2,nflav,ncol,nlay       ), intent(in) :: fmajor
-    real(wp), dimension(2,2,  nflav,ncol,nlay       ), intent(in) :: fminor
+    real(wp), dimension(2,    ncol,nlay,nflav       ), intent(in) :: col_mix
+    real(wp), dimension(2,2,2,ncol,nlay,nflav       ), intent(in) :: fmajor
+    real(wp), dimension(2,2,  ncol,nlay,nflav       ), intent(in) :: fminor
     real(wp), dimension(            ncol,nlay       ), intent(in) :: play, tlay      ! pressure and temperature
     real(wp), dimension(            ncol,nlay,0:ngas), intent(in) :: col_gas
-    integer,  dimension(2,    nflav,ncol,nlay       ), intent(in) :: jeta
+    integer,  dimension(2,    ncol,nlay,nflav       ), intent(in) :: jeta
     integer,  dimension(            ncol,nlay       ), intent(in) :: jtemp
     integer,  dimension(            ncol,nlay       ), intent(in) :: jpress
     ! ---------------------
     ! output - optical depth
-    real(wp), dimension(ngpt,nlay,ncol), intent(inout) :: tau
+    real(wp), dimension(ncol,nlay,ngpt), intent(inout) :: tau
     ! ---------------------
     ! Local variables
     !
@@ -285,17 +285,17 @@ subroutine gas_optical_depths_major(ncol,nlay,nbnd,ngpt,&
     ! inputs from object
     integer,  dimension(2,ngpt),  intent(in) :: gpoint_flavor
     integer,  dimension(2,nbnd),  intent(in) :: band_lims_gpt ! start and end g-point for each band
-    real(wp), dimension(ngpt,neta,npres+1,ntemp), intent(in) :: kmajor
+    real(wp), dimension(ntemp,neta,npres+1,ngpt), intent(in) :: kmajor
 
     ! inputs from profile or parent function
-    real(wp),    dimension(2,    nflav,ncol,nlay), intent(in) :: col_mix
-    real(wp),    dimension(2,2,2,nflav,ncol,nlay), intent(in) :: fmajor
-    integer,     dimension(2,    nflav,ncol,nlay), intent(in) :: jeta
+    real(wp),    dimension(2,    ncol,nlay,nflav), intent(in) :: col_mix
+    real(wp),    dimension(2,2,2,ncol,nlay,nflav), intent(in) :: fmajor
+    integer,     dimension(2,    ncol,nlay,nflav), intent(in) :: jeta
     logical(wl), dimension(ncol,nlay), intent(in) :: tropo
     integer,     dimension(ncol,nlay), intent(in) :: jtemp, jpress
 
     ! outputs
-    real(wp), dimension(ngpt,nlay,ncol), intent(inout) :: tau
+    real(wp), dimension(ncol,nlay,ngpt), intent(inout) :: tau
     ! -----------------
     ! local variables
     real(wp) :: tau_major(ngpt) ! major species optical depth
@@ -303,27 +303,26 @@ subroutine gas_optical_depths_major(ncol,nlay,nbnd,ngpt,&
     integer :: icol, ilay, iflav, ibnd, itropo
     integer :: gptS, gptE
 
-    ! -----------------
-
-    do icol = 1, ncol
+    ! optical depth calculation for major species
+    do ibnd = 1, nbnd
+      gptS = band_lims_gpt(1, ibnd)
+      gptE = band_lims_gpt(2, ibnd)
       do ilay = 1, nlay
-        ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
-        itropo = merge(1,2,tropo(icol,ilay))
-        ! optical depth calculation for major species
-        do ibnd = 1, nbnd
-          gptS = band_lims_gpt(1, ibnd)
-          gptE = band_lims_gpt(2, ibnd)
+        do icol = 1, ncol
+          ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
+          itropo = merge(1,2,tropo(icol,ilay))
           iflav = gpoint_flavor(itropo, gptS) !eta interpolation depends on band's flavor
           tau_major(gptS:gptE) = &
             ! interpolation in temperature, pressure, and eta
-            interpolate3D_byflav(col_mix(:,iflav,icol,ilay),                                     &
-                                 fmajor(:,:,:,iflav,icol,ilay), kmajor,                          &
+            interpolate3D_byflav(col_mix(:,icol,ilay,iflav),                                     &
+                                 fmajor(:,:,:,icol,ilay,iflav), kmajor,                          &
                                  band_lims_gpt(1, ibnd), band_lims_gpt(2, ibnd),                 &
-                                 jeta(:,iflav,icol,ilay), jtemp(icol,ilay),jpress(icol,ilay)+itropo)
-          tau(gptS:gptE,ilay,icol) = tau(gptS:gptE,ilay,icol) + tau_major(gptS:gptE)
-        end do ! igpt
+                                 jeta(:,icol,ilay,iflav), jtemp(icol,ilay),jpress(icol,ilay)+itropo)
+            tau(icol,ilay,gptS:gptE) = tau(icol,ilay,gptS:gptE) + tau_major(gptS:gptE)
+        end do
       end do
-    end do ! ilay
+    end do
+
   end subroutine gas_optical_depths_major
 
   ! ----------------------------------------------------------
@@ -350,7 +349,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
     integer,                                     intent(in   ) :: ntemp,neta,nminor,nminork
     integer,                                     intent(in   ) :: idx_h2o
     integer,     dimension(ngpt),                intent(in   ) :: gpt_flv
-    real(wp),    dimension(nminork,neta,ntemp),  intent(in   ) :: kminor
+    real(wp),    dimension(ntemp,neta,nminork),  intent(in   ) :: kminor
     integer,     dimension(2,nminor),            intent(in   ) :: minor_limits_gpt
     logical(wl), dimension(  nminor),            intent(in   ) :: minor_scales_with_density
     logical(wl), dimension(  nminor),            intent(in   ) :: scale_by_complement
@@ -358,11 +357,11 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
     integer,     dimension(  nminor),            intent(in   ) :: idx_minor, idx_minor_scaling
     real(wp),    dimension(ncol,nlay),           intent(in   ) :: play, tlay
     real(wp),    dimension(ncol,nlay,0:ngas),    intent(in   ) :: col_gas
-    real(wp),    dimension(2,2,nflav,ncol,nlay), intent(in   ) :: fminor
-    integer,     dimension(2,  nflav,ncol,nlay), intent(in   ) :: jeta
+    real(wp),    dimension(2,2,ncol,nlay,nflav), intent(in   ) :: fminor
+    integer,     dimension(2,  ncol,nlay,nflav), intent(in   ) :: jeta
     integer,     dimension(ncol, 2),             intent(in   ) :: layer_limits
     integer,     dimension(ncol,nlay),           intent(in   ) :: jtemp
-    real(wp),    dimension(ngpt,nlay,ncol),      intent(inout) :: tau
+    real(wp),    dimension(ncol,nlay,ngpt),      intent(inout) :: tau
     ! -----------------
     ! local variables
     real(wp), parameter :: PaTohPa = 0.01_wp
@@ -377,6 +376,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
     !   layers with pressures in the upper or lower atmosphere respectively
     ! First check skips the routine entirely if all columns are out of bounds...
     !
+
     if(any(layer_limits(:,1) > 0)) then
       do imnr = 1, size(scale_by_complement,dim=1) ! loop over minor absorbers in each band
         do icol = 1, ncol
@@ -416,16 +416,17 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
               gptE = minor_limits_gpt(2,imnr)
               iflav = gpt_flv(gptS)
               tau_minor(gptS:gptE) = scaling *                   &
-                                      interpolate2D_byflav(fminor(:,:,iflav,icol,ilay), &
+                                      interpolate2D_byflav(fminor(:,:,icol,ilay,iflav), &
                                                            kminor, &
                                                            kminor_start(imnr), kminor_start(imnr)+(gptE-gptS), &
-                                                           jeta(:,iflav,icol,ilay), jtemp(icol,ilay))
-              tau(gptS:gptE,ilay,icol) = tau(gptS:gptE,ilay,icol) + tau_minor(gptS:gptE)
+                                                           jeta(:,icol,ilay,iflav), jtemp(icol,ilay))
+              tau(icol,ilay,gptS:gptE) = tau(icol,ilay,gptS:gptE) + tau_minor(gptS:gptE)
             enddo
           end if
         enddo
       enddo
     end if
+
   end subroutine gas_optical_depths_minor
   ! ----------------------------------------------------------
   !
@@ -442,37 +443,39 @@ subroutine compute_tau_rayleigh(ncol,nlay,nbnd,ngpt,         &
     integer,                                     intent(in ) :: ngas,nflav,neta,npres,ntemp
     integer,     dimension(2,ngpt),              intent(in ) :: gpoint_flavor
     integer,     dimension(2,nbnd),              intent(in ) :: band_lims_gpt ! start and end g-point for each band
-    real(wp),    dimension(ngpt,neta,ntemp,2),   intent(in ) :: krayl
+    real(wp),    dimension(ntemp,neta,ngpt,2),   intent(in ) :: krayl
     integer,                                     intent(in ) :: idx_h2o
     real(wp),    dimension(ncol,nlay),           intent(in ) :: col_dry
     real(wp),    dimension(ncol,nlay,0:ngas),    intent(in ) :: col_gas
-    real(wp),    dimension(2,2,nflav,ncol,nlay), intent(in ) :: fminor
-    integer,     dimension(2,  nflav,ncol,nlay), intent(in ) :: jeta
+    real(wp),    dimension(2,2,ncol,nlay,nflav), intent(in ) :: fminor
+    integer,     dimension(2,  ncol,nlay,nflav), intent(in ) :: jeta
     logical(wl), dimension(ncol,nlay),           intent(in ) :: tropo
     integer,     dimension(ncol,nlay),           intent(in ) :: jtemp
     ! outputs
-    real(wp),    dimension(ngpt,nlay,ncol),      intent(out) :: tau_rayleigh
+    real(wp),    dimension(ncol,nlay,ngpt),      intent(out) :: tau_rayleigh
     ! -----------------
     ! local variables
     real(wp) :: k(ngpt) ! rayleigh scattering coefficient
-    integer  :: icol, ilay, iflav, ibnd, gptS, gptE
+    integer  :: icol, ilay, iflav, ibnd, igpt, gptS, gptE
     integer  :: itropo
     ! -----------------
-    do ilay = 1, nlay
-      do icol = 1, ncol
-        itropo = merge(1,2,tropo(icol,ilay)) ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
-        do ibnd = 1, nbnd
-          gptS = band_lims_gpt(1, ibnd)
-          gptE = band_lims_gpt(2, ibnd)
+
+    do ibnd = 1, nbnd
+      gptS = band_lims_gpt(1, ibnd)
+      gptE = band_lims_gpt(2, ibnd)
+      do ilay = 1, nlay
+        do icol = 1, ncol
+          itropo = merge(1,2,tropo(icol,ilay)) ! itropo = 1 lower atmosphere;itropo = 2 upper atmosphere
           iflav = gpoint_flavor(itropo, gptS) !eta interpolation depends on band's flavor
-          k(gptS:gptE) = interpolate2D_byflav(fminor(:,:,iflav,icol,ilay), &
+          k(gptS:gptE) = interpolate2D_byflav(fminor(:,:,icol,ilay,iflav), &
                                               krayl(:,:,:,itropo),      &
-                                              gptS, gptE, jeta(:,iflav,icol,ilay), jtemp(icol,ilay))
-          tau_rayleigh(gptS:gptE,ilay,icol) = k(gptS:gptE) * &
+                                              gptS, gptE, jeta(:,icol,ilay,iflav), jtemp(icol,ilay))
+          tau_rayleigh(icol,ilay,gptS:gptE) = k(gptS:gptE) * &
                                               (col_gas(icol,ilay,idx_h2o)+col_dry(icol,ilay))
         end do
       end do
     end do
+
   end subroutine compute_tau_rayleigh
 
   ! ----------------------------------------------------------
@@ -491,23 +494,22 @@ subroutine compute_Planck_source(                        &
     real(wp),    dimension(ncol       ),        intent(in) :: tsfc
     integer,                                    intent(in) :: sfc_lay
     ! Interpolation variables
-    real(wp),    dimension(2,2,2,nflav,ncol,nlay), intent(in) :: fmajor
-    integer,     dimension(2,    nflav,ncol,nlay), intent(in) :: jeta
+    real(wp),    dimension(2,2,2,ncol,nlay,nflav), intent(in) :: fmajor
+    integer,     dimension(2,    ncol,nlay,nflav), intent(in) :: jeta
     logical(wl), dimension(            ncol,nlay), intent(in) :: tropo
     integer,     dimension(            ncol,nlay), intent(in) :: jtemp, jpress
     ! Table-specific
     integer, dimension(ngpt),                     intent(in) :: gpoint_bands ! start and end g-point for each band
     integer, dimension(2, nbnd),                  intent(in) :: band_lims_gpt ! start and end g-point for each band
     real(wp),                                     intent(in) :: temp_ref_min, totplnk_delta
-    real(wp), dimension(ngpt,neta,npres+1,ntemp), intent(in) :: pfracin
+    real(wp), dimension(ntemp,neta,npres+1,ngpt), intent(in) :: pfracin
     real(wp), dimension(nPlanckTemp,nbnd),        intent(in) :: totplnk
     integer,  dimension(2,ngpt),                  intent(in) :: gpoint_flavor
 
-    real(wp), dimension(ngpt,     ncol), intent(out) :: sfc_src
-    real(wp), dimension(ngpt,nlay,ncol), intent(out) :: lay_src
-    real(wp), dimension(ngpt,nlay,ncol), intent(out) :: lev_src_inc, lev_src_dec
-
-    real(wp), dimension(ngpt,     ncol), intent(out) :: sfc_source_Jac
+    real(wp), dimension(ncol,     ngpt), intent(out) :: sfc_src
+    real(wp), dimension(ncol,nlay,ngpt), intent(out) :: lay_src
+    real(wp), dimension(ncol,nlay,ngpt), intent(out) :: lev_src_inc, lev_src_dec
+    real(wp), dimension(ncol,     ngpt), intent(out) :: sfc_source_Jac
     ! -----------------
     ! local
     real(wp), parameter                             :: delta_Tsurf = 1.0_wp
@@ -515,35 +517,36 @@ subroutine compute_Planck_source(                        &
     integer  :: ilay, icol, igpt, ibnd, itropo, iflav
     integer  :: gptS, gptE
     real(wp), dimension(2), parameter :: one = [1._wp, 1._wp]
-    real(wp) :: pfrac          (ngpt,nlay,  ncol)
-    real(wp) :: planck_function(nbnd,nlay+1,ncol)
+    real(wp) :: pfrac          (ncol,nlay  ,ngpt)
+    real(wp) :: planck_function(ncol,nlay+1,nbnd)
     ! -----------------
 
     ! Calculation of fraction of band's Planck irradiance associated with each g-point
-    do icol = 1, ncol
+    do ibnd = 1, nbnd
+      gptS = band_lims_gpt(1, ibnd)
+      gptE = band_lims_gpt(2, ibnd)
       do ilay = 1, nlay
-        ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
-        itropo = merge(1,2,tropo(icol,ilay))
-        do ibnd = 1, nbnd
-          gptS = band_lims_gpt(1, ibnd)
-          gptE = band_lims_gpt(2, ibnd)
+        do icol = 1, ncol
+          ! itropo = 1 lower atmosphere; itropo = 2 upper atmosphere
+          itropo = merge(1,2,tropo(icol,ilay))
           iflav = gpoint_flavor(itropo, gptS) !eta interpolation depends on band's flavor
-          pfrac(gptS:gptE,ilay,icol) = &
-            ! interpolation in temperature, pressure, and eta
-            interpolate3D_byflav(one, fmajor(:,:,:,iflav,icol,ilay), pfracin, &
+          pfrac(icol,ilay,gptS:gptE) = &
+            ! interpolation in temp-m64 -O3 -g -traceback -heap-arrays -assume
+            ! realloc_lhs -extend-source 132erature, pressure, and eta
+            interpolate3D_byflav(one, fmajor(:,:,:,icol,ilay,iflav), pfracin, &
                           band_lims_gpt(1, ibnd), band_lims_gpt(2, ibnd),                 &
-                          jeta(:,iflav,icol,ilay), jtemp(icol,ilay),jpress(icol,ilay)+itropo)
-        end do ! band
+                          jeta(:,icol,ilay,iflav), jtemp(icol,ilay),jpress(icol,ilay)+itropo)
+        end do ! column
       end do   ! layer
-    end do     ! column
+    end do     ! band
 
     !
     ! Planck function by band for the surface
     ! Compute surface source irradiance for g-point, equals band irradiance x fraction for g-point
     !
     do icol = 1, ncol
-      planck_function(1:nbnd,1,icol) = interpolate1D(tsfc(icol)              , temp_ref_min, totplnk_delta, totplnk)
-      planck_function(1:nbnd,2,icol) = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk)
+      planck_function(icol,1,1:nbnd) = interpolate1D(tsfc(icol), temp_ref_min, totplnk_delta, totplnk)
+      planck_function(icol,2,1:nbnd) = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk) 
       !
       ! Map to g-points
       !
@@ -551,48 +554,58 @@ subroutine compute_Planck_source(                        &
         gptS = band_lims_gpt(1, ibnd)
         gptE = band_lims_gpt(2, ibnd)
         do igpt = gptS, gptE
-          sfc_src       (igpt, icol) = pfrac(igpt,sfc_lay,icol) * planck_function(ibnd,1,icol)
-          sfc_source_Jac(igpt, icol) = pfrac(igpt,sfc_lay,icol) * &
-                                (planck_function(ibnd, 2, icol) - planck_function(ibnd,1,icol))
+            sfc_src(icol,igpt) = pfrac(icol,sfc_lay,igpt) * planck_function(icol,1,ibnd)
+            sfc_source_Jac(icol, igpt) = pfrac(icol,sfc_lay,igpt) * &
+                                (planck_function(icol, 2, ibnd) - planck_function(icol,1,ibnd))
         end do
       end do
-    end do ! icol
+    end do !icol 
 
-    do icol = 1, ncol
-      do ilay = 1, nlay
+    do ilay = 1, nlay
+      do icol = 1, ncol
         ! Compute layer source irradiance for g-point, equals band irradiance x fraction for g-point
-        planck_function(1:nbnd,ilay,icol) = interpolate1D(tlay(icol,ilay), temp_ref_min, totplnk_delta, totplnk)
-        !
-        ! Map to g-points
-        !
-        do ibnd = 1, nbnd
-          gptS = band_lims_gpt(1, ibnd)
-          gptE = band_lims_gpt(2, ibnd)
-          do igpt = gptS, gptE
-            lay_src(igpt,ilay,icol) = pfrac(igpt,ilay,icol) * planck_function(ibnd,ilay,icol)
+        planck_function(icol,ilay,1:nbnd) = interpolate1D(tlay(icol,ilay), temp_ref_min, totplnk_delta, totplnk)
+      end do
+    end do
+
+    !
+    ! Map to g-points
+    !
+    do ibnd = 1, nbnd
+      gptS = band_lims_gpt(1, ibnd)
+      gptE = band_lims_gpt(2, ibnd)
+      do igpt = gptS, gptE
+        do ilay = 1, nlay
+          do icol = 1, ncol    
+            lay_src(icol,ilay,igpt) = pfrac(icol,ilay,igpt) * planck_function(icol,ilay,ibnd)
           end do
         end do
-      end do ! ilay
-    end do ! icol
+      end do
+    end do
 
     ! compute level source irradiances for each g-point, one each for upward and downward paths
-    do icol = 1, ncol
-      planck_function(1:nbnd,       1,icol) = interpolate1D(tlev(icol,     1), temp_ref_min, totplnk_delta, totplnk)
-      do ilay = 1, nlay
-        planck_function(1:nbnd,ilay+1,icol) = interpolate1D(tlev(icol,ilay+1), temp_ref_min, totplnk_delta, totplnk)
-        !
-        ! Map to g-points
-        !
-        do ibnd = 1, nbnd
-          gptS = band_lims_gpt(1, ibnd)
-          gptE = band_lims_gpt(2, ibnd)
-          do igpt = gptS, gptE
-            lev_src_inc(igpt,ilay,icol) = pfrac(igpt,ilay,icol) * planck_function(ibnd,ilay+1,icol)
-            lev_src_dec(igpt,ilay,icol) = pfrac(igpt,ilay,icol) * planck_function(ibnd,ilay,  icol)
+    do ilay = 1, nlay
+      do icol = 1, ncol
+      planck_function(icol,     1,1:nbnd) = interpolate1D(tlev(icol,     1),temp_ref_min, totplnk_delta, totplnk)
+      planck_function(icol,ilay+1,1:nbnd) = interpolate1D(tlev(icol,ilay+1),temp_ref_min, totplnk_delta, totplnk)
+      end do
+    end do
+    
+    !
+    ! Map to g-points
+    !
+    do ibnd = 1, nbnd
+      gptS = band_lims_gpt(1, ibnd)
+      gptE = band_lims_gpt(2, ibnd)
+      do igpt = gptS, gptE
+        do ilay = 1, nlay
+          do icol = 1, ncol
+            lev_src_inc(icol,ilay,igpt) = pfrac(icol,ilay,igpt) *planck_function(icol,ilay+1,ibnd)
+            lev_src_dec(icol,ilay,igpt) = pfrac(icol,ilay,igpt) *planck_function(icol,ilay  ,ibnd)
           end do
         end do
-      end do ! ilay
-    end do ! icol
+      end do
+    end do
 
   end subroutine compute_Planck_source
   ! ----------------------------------------------------------
@@ -632,10 +645,10 @@ pure function interpolate2D(fminor, k, igpt, jeta, jtemp) result(res)
     real(wp)                             :: res ! the result
 
     res =  &
-      fminor(1,1) * k(igpt, jeta(1)  , jtemp  ) + &
-      fminor(2,1) * k(igpt, jeta(1)+1, jtemp  ) + &
-      fminor(1,2) * k(igpt, jeta(2)  , jtemp+1) + &
-      fminor(2,2) * k(igpt, jeta(2)+1, jtemp+1)
+      fminor(1,1) * k(jtemp  , jeta(1)  , igpt) + &
+      fminor(2,1) * k(jtemp  , jeta(1)+1, igpt) + &
+      fminor(1,2) * k(jtemp+1, jeta(2)  , igpt) + &
+      fminor(2,2) * k(jtemp+1, jeta(2)+1, igpt)
   end function interpolate2D
   ! ----------------------------------------------------------
   !   This function returns a range of values from a subset (in gpoint) of the k table
@@ -652,12 +665,14 @@ pure function interpolate2D_byflav(fminor, k, gptS, gptE, jeta, jtemp) result(re
     ! Local variable
     integer :: igpt
     ! each code block is for a different reference temperature
+
     do igpt = 1, gptE-gptS+1
-      res(igpt) = fminor(1,1) * k(gptS+igpt-1, jeta(1)  , jtemp  ) + &
-                  fminor(2,1) * k(gptS+igpt-1, jeta(1)+1, jtemp  ) + &
-                  fminor(1,2) * k(gptS+igpt-1, jeta(2)  , jtemp+1) + &
-                  fminor(2,2) * k(gptS+igpt-1, jeta(2)+1, jtemp+1)
+      res(igpt) = fminor(1,1) * k(jtemp  , jeta(1)  , gptS+igpt-1) + &
+                  fminor(2,1) * k(jtemp  , jeta(1)+1, gptS+igpt-1) + &
+                  fminor(1,2) * k(jtemp+1, jeta(2)  , gptS+igpt-1) + &
+                  fminor(2,2) * k(jtemp+1, jeta(2)+1, gptS+igpt-1)
     end do
+
   end function interpolate2D_byflav
   ! ----------------------------------------------------------
   ! interpolation in temperature, pressure, and eta
@@ -676,15 +691,15 @@ pure function interpolate3D(scaling, fmajor, k, igpt, jeta, jtemp, jpress) resul
     ! each code block is for a different reference temperature
     res =  &
       scaling(1) * &
-      ( fmajor(1,1,1) * k(igpt, jeta(1)  , jpress-1, jtemp  ) + &
-        fmajor(2,1,1) * k(igpt, jeta(1)+1, jpress-1, jtemp  ) + &
-        fmajor(1,2,1) * k(igpt, jeta(1)  , jpress  , jtemp  ) + &
-        fmajor(2,2,1) * k(igpt, jeta(1)+1, jpress  , jtemp  ) ) + &
+      ( fmajor(1,1,1) * k(jtemp, jeta(1)  , jpress-1, igpt  ) + &
+        fmajor(2,1,1) * k(jtemp, jeta(1)+1, jpress-1, igpt  ) + &
+        fmajor(1,2,1) * k(jtemp, jeta(1)  , jpress  , igpt  ) + &
+        fmajor(2,2,1) * k(jtemp, jeta(1)+1, jpress  , igpt  ) ) + &
       scaling(2) * &
-      ( fmajor(1,1,2) * k(igpt, jeta(2)  , jpress-1, jtemp+1) + &
-        fmajor(2,1,2) * k(igpt, jeta(2)+1, jpress-1, jtemp+1) + &
-        fmajor(1,2,2) * k(igpt, jeta(2)  , jpress  , jtemp+1) + &
-        fmajor(2,2,2) * k(igpt, jeta(2)+1, jpress  , jtemp+1) )
+      ( fmajor(1,1,2) * k(jtemp+1, jeta(2)  , jpress-1, igpt) + &
+        fmajor(2,1,2) * k(jtemp+1, jeta(2)+1, jpress-1, igpt) + &
+        fmajor(1,2,2) * k(jtemp+1, jeta(2)  , jpress  , igpt) + &
+        fmajor(2,2,2) * k(jtemp+1, jeta(2)+1, jpress  , igpt) )
   end function interpolate3D
   ! ----------------------------------------------------------
   pure function interpolate3D_byflav(scaling, fmajor, k, gptS, gptE, jeta, jtemp, jpress) result(res)
@@ -706,77 +721,16 @@ pure function interpolate3D_byflav(scaling, fmajor, k, gptS, gptE, jeta, jtemp,
     do igpt = 1, gptE-gptS+1
       res(igpt) =  &
         scaling(1) * &
-        ( fmajor(1,1,1) * k(gptS+igpt-1, jeta(1)  , jpress-1, jtemp  ) + &
-          fmajor(2,1,1) * k(gptS+igpt-1, jeta(1)+1, jpress-1, jtemp  ) + &
-          fmajor(1,2,1) * k(gptS+igpt-1, jeta(1)  , jpress  , jtemp  ) + &
-          fmajor(2,2,1) * k(gptS+igpt-1, jeta(1)+1, jpress  , jtemp  ) ) + &
+        ( fmajor(1,1,1) * k(jtemp, jeta(1)  , jpress-1, gptS+igpt-1 ) + &
+          fmajor(2,1,1) * k(jtemp, jeta(1)+1, jpress-1, gptS+igpt-1 ) + &
+          fmajor(1,2,1) * k(jtemp, jeta(1)  , jpress  , gptS+igpt-1 ) + &
+          fmajor(2,2,1) * k(jtemp, jeta(1)+1, jpress  , gptS+igpt-1 ) ) + &
         scaling(2) * &
-        ( fmajor(1,1,2) * k(gptS+igpt-1, jeta(2)  , jpress-1, jtemp+1) + &
-          fmajor(2,1,2) * k(gptS+igpt-1, jeta(2)+1, jpress-1, jtemp+1) + &
-          fmajor(1,2,2) * k(gptS+igpt-1, jeta(2)  , jpress  , jtemp+1) + &
-          fmajor(2,2,2) * k(gptS+igpt-1, jeta(2)+1, jpress  , jtemp+1) )
+        ( fmajor(1,1,2) * k(jtemp+1, jeta(2)  , jpress-1, gptS+igpt-1) + &
+          fmajor(2,1,2) * k(jtemp+1, jeta(2)+1, jpress-1, gptS+igpt-1) + &
+          fmajor(1,2,2) * k(jtemp+1, jeta(2)  , jpress  , gptS+igpt-1) + &
+          fmajor(2,2,2) * k(jtemp+1, jeta(2)+1, jpress  , gptS+igpt-1) )
     end do
   end function interpolate3D_byflav
-  ! ----------------------------------------------------------
-  !
-  ! Combine absoprtion and Rayleigh optical depths for total tau, ssa, g
-  !
-  subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau, ssa, g) &
-      bind(C, name="combine_and_reorder_2str")
-    integer,                             intent(in) :: ncol, nlay, ngpt
-    real(wp), dimension(ngpt,nlay,ncol), intent(in   ) :: tau_abs, tau_rayleigh
-    real(wp), dimension(ncol,nlay,ngpt), intent(inout) :: tau, ssa, g ! inout because components are allocated
-    ! -----------------------
-    integer  :: icol, ilay, igpt
-    real(wp) :: t
-    ! -----------------------
-    call zero_array(ncol, nlay, ngpt, g)
-    do ilay = 1, nlay
-      do igpt = 1, ngpt
-        do icol = 1, ncol
-           t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol)
-           tau(icol,ilay,igpt) = t
-           if(t > 2._wp * tiny(t)) then
-             ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t
-           else
-             ssa(icol,ilay,igpt) = 0._wp
-           end if
-        end do
-      end do
-    end do
-  end subroutine combine_and_reorder_2str
-  ! ----------------------------------------------------------
-  !
-  ! Combine absoprtion and Rayleigh optical depths for total tau, ssa, p
-  !   using Rayleigh scattering phase function
-  !
-  pure subroutine combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau_abs, tau_rayleigh, tau, ssa, p) &
-      bind(C, name="combine_and_reorder_nstr")
-    integer, intent(in) :: ncol, nlay, ngpt, nmom
-    real(wp), dimension(ngpt,nlay,ncol), intent(in ) :: tau_abs, tau_rayleigh
-    real(wp), dimension(ncol,nlay,ngpt), intent(inout) :: tau, ssa
-    real(wp), dimension(ncol,nlay,ngpt,nmom), &
-                                         intent(inout) :: p
-    ! -----------------------
-    integer :: icol, ilay, igpt, imom
-    real(wp) :: t
-    ! -----------------------
-    do icol = 1, ncol
-      do ilay = 1, nlay
-        do igpt = 1, ngpt
-          t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol)
-          tau(icol,ilay,igpt) = t
-          if(t > 2._wp * tiny(t)) then
-            ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t
-          else
-            ssa(icol,ilay,igpt) = 0._wp
-          end if
-          do imom = 1, nmom
-            p(imom,icol,ilay,igpt) = 0.0_wp
-          end do
-          if(nmom >= 2) p(2,icol,ilay,igpt) = 0.1_wp
-        end do
-      end do
-    end do
-  end subroutine combine_and_reorder_nstr
+
 end module mo_gas_optics_kernels
diff --git a/rrtmgp/mo_gas_optics_rrtmgp.F90 b/rrtmgp/mo_gas_optics_rrtmgp.F90
index 8c7eb011f..2197de265 100644
--- a/rrtmgp/mo_gas_optics_rrtmgp.F90
+++ b/rrtmgp/mo_gas_optics_rrtmgp.F90
@@ -27,8 +27,7 @@ module mo_gas_optics_rrtmgp
   use mo_optical_props,      only: ty_optical_props
   use mo_source_functions,   only: ty_source_func_lw
   use mo_gas_optics_kernels, only: interpolation,                                                       &
-                                   compute_tau_absorption, compute_tau_rayleigh, compute_Planck_source, &
-                                   combine_and_reorder_2str, combine_and_reorder_nstr
+                                   compute_tau_absorption, compute_tau_rayleigh, compute_Planck_source
   use mo_rrtmgp_constants,   only: avogad, m_dry, m_h2o, grav
   use mo_rrtmgp_util_string, only: lower_case, string_in_array, string_loc_in_array
   use mo_gas_concentrations, only: ty_gas_concs
@@ -244,8 +243,8 @@ function gas_optics_int(this,                             &
     ! Interpolation coefficients for use in source function
     integer,     dimension(size(play,dim=1), size(play,dim=2)) :: jtemp, jpress
     logical(wl), dimension(size(play,dim=1), size(play,dim=2)) :: tropo
-    real(wp),    dimension(2,2,2,get_nflav(this),size(play,dim=1), size(play,dim=2)) :: fmajor
-    integer,     dimension(2,    get_nflav(this),size(play,dim=1), size(play,dim=2)) :: jeta
+    real(wp),    dimension(2,2,2,size(play,dim=1),size(play,dim=2), get_nflav(this)) :: fmajor
+    integer,     dimension(2,    size(play,dim=1),size(play,dim=2), get_nflav(this)) :: jeta
 
     integer :: ncol, nlay, ngpt, nband
     ! ----------------------------------------------------------
@@ -265,7 +264,6 @@ function gas_optics_int(this,                             &
                                  jtemp, jpress, jeta, tropo, fmajor, &
                                  col_dry)
     if(error_msg  /= '') return
-
     ! ----------------------------------------------------------
     !
     ! External source -- check arrays sizes and values
@@ -355,8 +353,8 @@ function gas_optics_ext(this,                         &
     ! Interpolation coefficients for use in source function
     integer,     dimension(size(play,dim=1), size(play,dim=2)) :: jtemp, jpress
     logical(wl), dimension(size(play,dim=1), size(play,dim=2)) :: tropo
-    real(wp),    dimension(2,2,2,get_nflav(this),size(play,dim=1), size(play,dim=2)) :: fmajor
-    integer,     dimension(2,    get_nflav(this),size(play,dim=1), size(play,dim=2)) :: jeta
+    real(wp),    dimension(2,2,2,size(play,dim=1),size(play,dim=2), get_nflav(this)) :: fmajor
+    integer,     dimension(2,    size(play,dim=1),size(play,dim=2), get_nflav(this)) :: jeta
 
     integer :: ncol, nlay, ngpt, nband, ngas, nflav
     integer :: igpt, icol
@@ -425,9 +423,9 @@ function compute_gas_taus(this,                       &
     class(ty_optical_props_arry),     intent(inout) :: optical_props !inout because components are allocated
     ! Interpolation coefficients for use in internal source function
     integer,     dimension(                      ncol, nlay), intent(  out) :: jtemp, jpress
-    integer,     dimension(2,    get_nflav(this),ncol, nlay), intent(  out) :: jeta
+    integer,     dimension(2,    ncol, nlay,get_nflav(this)), intent(  out) :: jeta
     logical(wl), dimension(                      ncol, nlay), intent(  out) :: tropo
-    real(wp),    dimension(2,2,2,get_nflav(this),ncol, nlay), intent(  out) :: fmajor
+    real(wp),    dimension(2,2,2,ncol, nlay,get_nflav(this)), intent(  out) :: fmajor
     character(len=128)                                         :: error_msg
 
     ! Optional inputs
@@ -435,7 +433,7 @@ function compute_gas_taus(this,                       &
                            optional, target :: col_dry ! Column dry amount; dim(ncol,nlay)
     ! ----------------------------------------------------------
     ! Local variables
-    real(wp), dimension(ngpt,nlay,ncol) :: tau, tau_rayleigh  ! absorption, Rayleigh scattering optical depths
+    real(wp), dimension(ncol,nlay,ngpt) :: tau, tau_rayleigh  ! absorption, Rayleigh scattering optical depths
     ! Number of molecules per cm^2
     real(wp), dimension(ncol,nlay), target  :: col_dry_arr
     real(wp), dimension(:,:),       pointer :: col_dry_wk
@@ -444,11 +442,11 @@ function compute_gas_taus(this,                       &
     !
     real(wp), dimension(ncol,nlay,  this%get_ngas()) :: vmr     ! volume mixing ratios
     real(wp), dimension(ncol,nlay,0:this%get_ngas()) :: col_gas ! column amounts for each gas, plus col_dry
-    real(wp), dimension(2,    get_nflav(this),ncol,nlay) :: col_mix ! combination of major species's column amounts
+    real(wp), dimension(2,    ncol,nlay,get_nflav(this)) :: col_mix ! combination of major species's column amounts
                                                          ! index(1) : reference temperature level
                                                          ! index(2) : flavor
                                                          ! index(3) : layer
-    real(wp), dimension(2,2,  get_nflav(this),ncol,nlay) :: fminor ! interpolation fractions for minor species
+    real(wp), dimension(2,2,  ncol,nlay,get_nflav(this)) :: fminor ! interpolation fractions for minor species
                                                           ! index(1) : reference eta level (temperature dependent)
                                                           ! index(2) : reference temperature level
                                                           ! index(3) : flavor
@@ -478,8 +476,8 @@ function compute_gas_taus(this,                       &
     !
     ! Check input data sizes and values
     !
-    !$acc enter data copyin(play,plev,tlay)
-    !$omp target enter data map(to:play, plev, tlay)
+    !$acc        data copyin(play,plev,tlay) create(   vmr,col_gas)
+    !$omp target data map(to:play,plev,tlay) map(alloc:vmr,col_gas)
     if(check_extents) then
       if(.not. extents_are(play, ncol, nlay  )) &
         error_msg = "gas_optics(): array play has wrong size"
@@ -496,61 +494,70 @@ function compute_gas_taus(this,                       &
           error_msg = "gas_optics(): array col_dry has wrong size"
       end if
     end if
-    if(error_msg  /= '') return
 
-    if(check_values) then
-      if(any_vals_outside(play, this%press_ref_min,this%press_ref_max)) &
-        error_msg = "gas_optics(): array play has values outside range"
-      if(any_vals_less_than(plev, 0._wp)) &
-        error_msg = "gas_optics(): array plev has values outside range"
-      if(any_vals_outside(tlay, this%temp_ref_min,  this%temp_ref_max)) &
-        error_msg = "gas_optics(): array tlay has values outside range"
-      if(present(col_dry)) then
-        if(any_vals_less_than(col_dry, 0._wp)) &
-          error_msg = "gas_optics(): array col_dry has values outside range"
+    if(error_msg == '') then
+      if(check_values) then
+        if(any_vals_outside(play, this%press_ref_min,this%press_ref_max)) &
+          error_msg = "gas_optics(): array play has values outside range"
+        if(any_vals_less_than(plev, 0._wp)) &
+          error_msg = "gas_optics(): array plev has values outside range"
+        if(any_vals_outside(tlay, this%temp_ref_min,  this%temp_ref_max)) &
+          error_msg = "gas_optics(): array tlay has values outside range"
+        if(present(col_dry)) then
+          if(any_vals_less_than(col_dry, 0._wp)) &
+            error_msg = "gas_optics(): array col_dry has values outside range"
+        end if
       end if
     end if
-    if(error_msg  /= '') return
-
 
     ! ----------------------------------------------------------
-    ngas  = this%get_ngas()
-    nflav = get_nflav(this)
-    neta  = this%get_neta()
-    npres = this%get_npres()
-    ntemp = this%get_ntemp()
-    ! number of minor contributors, total num absorption coeffs
-    nminorlower  = size(this%minor_scales_with_density_lower)
-    nminorklower = size(this%kminor_lower, 1)
-    nminorupper  = size(this%minor_scales_with_density_upper)
-    nminorkupper = size(this%kminor_upper, 1)
-    !
-    ! Fill out the array of volume mixing ratios
-    !
-    !$acc enter data create(vmr)
-    !$omp target enter data map(alloc:vmr)
-    do igas = 1, ngas
+    if(error_msg == '') then
+      ngas  = this%get_ngas()
+      nflav = get_nflav(this)
+      neta  = this%get_neta()
+      npres = this%get_npres()
+      ntemp = this%get_ntemp()
+      ! number of minor contributors, total num absorption coeffs
+      nminorlower  = size(this%minor_scales_with_density_lower)
+      nminorklower = size(this%kminor_lower, 3)
+      nminorupper  = size(this%minor_scales_with_density_upper)
+      nminorkupper = size(this%kminor_upper, 3)
       !
-      ! Get vmr if  gas is provided in ty_gas_concs
+      ! Fill out the array of volume mixing ratios
       !
-      if (any (lower_case(this%gas_names(igas)) == gas_desc%gas_name(:))) then
-         error_msg = gas_desc%get_vmr(this%gas_names(igas), vmr(:,:,igas))
-         if (error_msg /= '') return
-      endif
-    end do
+      do igas = 1, ngas
+        !
+        ! Get vmr if  gas is provided in ty_gas_concs
+        !
+        if (any (lower_case(this%gas_names(igas)) == gas_desc%gas_name(:))) then
+          error_msg = gas_desc%get_vmr(this%gas_names(igas), vmr(:,:,igas))
+        endif
+      end do
+    end if
 
+    if(error_msg == '') then
+
+    select type(optical_props)
+      type is (ty_optical_props_1scl)
+        !$acc        enter data copyin(optical_props) create(   optical_props%tau)
+        !$omp target enter data                       map(alloc:optical_props%tau)
+      type is (ty_optical_props_2str)
+        !$acc        enter data copyin(optical_props) create(   optical_props%tau, optical_props%ssa, optical_props%g)
+        !$omp target enter data                       map(alloc:optical_props%tau, optical_props%ssa, optical_props%g)
+      type is (ty_optical_props_nstr)
+        !$acc        enter data copyin(optical_props) create(   optical_props%tau, optical_props%ssa, optical_props%p)
+        !$omp target enter data                       map(alloc:optical_props%tau, optical_props%ssa, optical_props%p)
+    end select
     !
     ! Compute dry air column amounts (number of molecule per cm^2) if user hasn't provided them
     !
     idx_h2o = string_loc_in_array('h2o', this%gas_names)
-    !$acc enter data create(col_gas)
-    !$omp target enter data map(alloc:col_gas)
     if (present(col_dry)) then
-      !$acc enter data copyin(col_dry)
+      !$acc        enter data copyin(col_dry)
       !$omp target enter data map(to:col_dry)
       col_dry_wk => col_dry
     else
-      !$acc enter data create(col_dry_arr)
+      !$acc        enter data create(   col_dry_arr)
       !$omp target enter data map(alloc:col_dry_arr)
       col_dry_arr = get_col_dry(vmr(:,:,idx_h2o), plev) ! dry air column amounts computation
       col_dry_wk => col_dry_arr
@@ -574,22 +581,11 @@ function compute_gas_taus(this,                       &
         end do
       end do
     end do
-    !$acc exit data delete(vmr)
-    !$omp target exit data map(release:vmr)
-
     !
     ! ---- calculate gas optical depths ----
     !
-    !$acc enter data create(jtemp, jpress, jeta, tropo, fmajor)
-    !$omp target enter data map(alloc:jtemp, jpress, jeta, tropo, fmajor)
-    !$acc enter data create(tau, tau_rayleigh)
-    !$omp target enter data map(alloc:tau, tau_rayleigh)
-    !$acc enter data create(col_mix, fminor)
-    !$omp target enter data map(alloc:col_mix, fminor)
-    !$acc enter data copyin(this)
-    !$acc enter data copyin(this%gpoint_flavor)
-    !$omp target enter data map(to:this%gpoint_flavor)
-    call zero_array(ngpt, nlay, ncol, tau)
+    !$acc        data copyout( jtemp, jpress, jeta, tropo, fmajor) create(   col_mix, fminor)
+    !$omp target data map(from:jtemp, jpress, jeta, tropo, fmajor) map(alloc:col_mix, fminor)
     call interpolation(               &
             ncol,nlay,                &        ! problem dimensions
             ngas, nflav, neta, npres, ntemp, & ! interpolation dimensions
@@ -609,37 +605,38 @@ function compute_gas_taus(this,                       &
             col_mix,      &
             tropo,        &
             jeta,jpress)
-    call compute_tau_absorption(                     &
-            ncol,nlay,nband,ngpt,                    &  ! dimensions
-            ngas,nflav,neta,npres,ntemp,             &
-            nminorlower, nminorklower,               & ! number of minor contributors, total num absorption coeffs
-            nminorupper, nminorkupper,               &
-            idx_h2o,                                 &
-            this%gpoint_flavor,                      &
-            this%get_band_lims_gpoint(),             &
-            this%kmajor,                             &
-            this%kminor_lower,                       &
-            this%kminor_upper,                       &
-            this%minor_limits_gpt_lower,             &
-            this%minor_limits_gpt_upper,             &
-            this%minor_scales_with_density_lower,    &
-            this%minor_scales_with_density_upper,    &
-            this%scale_by_complement_lower,          &
-            this%scale_by_complement_upper,          &
-            this%idx_minor_lower,                    &
-            this%idx_minor_upper,                    &
-            this%idx_minor_scaling_lower,            &
-            this%idx_minor_scaling_upper,            &
-            this%kminor_start_lower,                 &
-            this%kminor_start_upper,                 &
-            tropo,                                   &
-            col_mix,fmajor,fminor,                   &
-            play,tlay,col_gas,                       &
-            jeta,jtemp,jpress,                       &
-            tau)
     if (allocated(this%krayl)) then
-      !$acc enter data copyin(this%krayl)
-      !$omp target enter data map(to:this%krayl)
+      !$acc        data copyin(this%gpoint_flavor, this%krayl)    create(tau, tau_rayleigh)
+      !$omp target data map(to:this%gpoint_flavor, this%krayl) map(alloc:tau, tau_rayleigh)
+      call zero_array(ngpt, nlay, ncol, tau)
+      call compute_tau_absorption(                     &
+              ncol,nlay,nband,ngpt,                    &  ! dimensions
+              ngas,nflav,neta,npres,ntemp,             &
+              nminorlower, nminorklower,               & ! number of minor contributors, total num absorption coeffs
+              nminorupper, nminorkupper,               &
+              idx_h2o,                                 &
+              this%gpoint_flavor,                      &
+              this%get_band_lims_gpoint(),             &
+              this%kmajor,                             &
+              this%kminor_lower,                       &
+              this%kminor_upper,                       &
+              this%minor_limits_gpt_lower,             &
+              this%minor_limits_gpt_upper,             &
+              this%minor_scales_with_density_lower,    &
+              this%minor_scales_with_density_upper,    &
+              this%scale_by_complement_lower,          &
+              this%scale_by_complement_upper,          &
+              this%idx_minor_lower,                    &
+              this%idx_minor_upper,                    &
+              this%idx_minor_scaling_lower,            &
+              this%idx_minor_scaling_upper,            &
+              this%kminor_start_lower,                 &
+              this%kminor_start_upper,                 &
+              tropo,                                   &
+              col_mix,fmajor,fminor,                   &
+              play,tlay,col_gas,                       &
+              jeta,jtemp,jpress,                       &
+              tau)
       call compute_tau_rayleigh(         & !Rayleigh scattering optical depths
             ncol,nlay,nband,ngpt,        &
             ngas,nflav,neta,npres,ntemp, & ! dimensions
@@ -649,23 +646,69 @@ function compute_gas_taus(this,                       &
             idx_h2o, col_dry_wk,col_gas, &
             fminor,jeta,tropo,jtemp,     & ! local input
             tau_rayleigh)
-      !$acc exit data delete(this%krayl)
-      !$omp target exit data map(release:this%krayl)
+      call combine_abs_and_rayleigh(tau, tau_rayleigh, optical_props)
+      !$acc end        data
+      !$omp end target data
+    else
+      call zero_array(ngpt, nlay, ncol, optical_props%tau)
+      call compute_tau_absorption(                     &
+              ncol,nlay,nband,ngpt,                    &  ! dimensions
+              ngas,nflav,neta,npres,ntemp,             &
+              nminorlower, nminorklower,               & ! number of minor contributors, total num absorption coeffs
+              nminorupper, nminorkupper,               &
+              idx_h2o,                                 &
+              this%gpoint_flavor,                      &
+              this%get_band_lims_gpoint(),             &
+              this%kmajor,                             &
+              this%kminor_lower,                       &
+              this%kminor_upper,                       &
+              this%minor_limits_gpt_lower,             &
+              this%minor_limits_gpt_upper,             &
+              this%minor_scales_with_density_lower,    &
+              this%minor_scales_with_density_upper,    &
+              this%scale_by_complement_lower,          &
+              this%scale_by_complement_upper,          &
+              this%idx_minor_lower,                    &
+              this%idx_minor_upper,                    &
+              this%idx_minor_scaling_lower,            &
+              this%idx_minor_scaling_upper,            &
+              this%kminor_start_lower,                 &
+              this%kminor_start_upper,                 &
+              tropo,                                   &
+              col_mix,fmajor,fminor,                   &
+              play,tlay,col_gas,                       &
+              jeta,jtemp,jpress,                       &
+              optical_props%tau)  !
+      select type(optical_props)
+        type is (ty_optical_props_2str)
+          call zero_array(ncol, nlay, ngpt, optical_props%ssa)
+          call zero_array(ncol, nlay, ngpt, optical_props%g)
+        type is (ty_optical_props_nstr)
+          call zero_array(ncol, nlay, ngpt, optical_props%ssa)
+          call zero_array(optical_props%get_nmom(), &
+                          ncol, nlay, ngpt, optical_props%p)
+      end select
     end if
-    if (error_msg /= '') return
-
-    ! Combine optical depths and reorder for radiative transfer solver.
-    call combine_and_reorder(tau, tau_rayleigh, allocated(this%krayl), optical_props)
-    !$acc exit data delete(play, tlay, plev)
-    !$omp target exit data map(release:play, tlay, plev)
-    !$acc exit data delete(tau, tau_rayleigh)
-    !$omp target exit data map(release:tau, tau_rayleigh)
-    !$acc exit data delete(col_dry_wk, col_gas, col_mix, fminor)
-    !$omp target exit data map(release:col_dry_wk, col_gas, col_mix, fminor)
-    !$acc exit data delete(this%gpoint_flavor)
-    !$omp target exit data map(release:this%gpoint_flavor)
-    !$acc exit data copyout(jtemp, jpress, jeta, tropo, fmajor)
-    !$omp target exit data map(from:jtemp, jpress, jeta, tropo, fmajor)
+    !$acc end        data
+    !$omp end target data
+    end if
+    !$acc end        data
+    !$omp end target data
+
+    !$acc        exit data delete(     col_dry_wk)
+    !$omp target exit data map(release:col_dry_wk)
+
+    select type(optical_props)
+      type is (ty_optical_props_1scl)
+        !$acc        exit data delete(optical_props) copyout( optical_props%tau)
+        !$omp target exit data                       map(from:optical_props%tau)
+      type is (ty_optical_props_2str)
+        !$acc        exit data delete(optical_props) copyout( optical_props%tau, optical_props%ssa, optical_props%g)
+        !$omp target exit data                       map(from:optical_props%tau, optical_props%ssa, optical_props%g)
+      type is (ty_optical_props_nstr)
+        !$acc        exit data delete(optical_props) copyout( optical_props%tau, optical_props%ssa, optical_props%p)
+        !$omp target exit data                       map(from:optical_props%tau, optical_props%ssa, optical_props%p)
+    end select
   end function compute_gas_taus
   !------------------------------------------------------------------------------------------
   !
@@ -768,9 +811,9 @@ function source(this,                               &
     ! Interplation coefficients
     integer,     dimension(ncol,nlay),     intent(in   ) :: jtemp, jpress
     logical(wl), dimension(ncol,nlay),     intent(in   ) :: tropo
-    real(wp),    dimension(2,2,2,get_nflav(this),ncol,nlay),  &
+    real(wp),    dimension(2,2,2,ncol,nlay,get_nflav(this)),  &
                                            intent(in   ) :: fmajor
-    integer,     dimension(2,    get_nflav(this),ncol,nlay),  &
+    integer,     dimension(2,    ncol,nlay,get_nflav(this)),  &
                                            intent(in   ) :: jeta
     class(ty_source_func_lw    ),          intent(inout) :: sources
     real(wp), dimension(ncol,nlay+1),      intent(in   ), &
@@ -779,10 +822,6 @@ function source(this,                               &
     ! ----------------------------------------------------------
     logical(wl)                                  :: top_at_1
     integer                                      :: icol, ilay, igpt
-    real(wp), dimension(ngpt,nlay,ncol)          :: lay_source_t, lev_source_inc_t, lev_source_dec_t
-    real(wp), dimension(ngpt,     ncol)          :: sfc_source_t
-    real(wp), dimension(ngpt,     ncol)          :: sfc_source_Jac
-
     ! Variables for temperature at layer edges [K] (ncol, nlay+1)
     real(wp), dimension(   ncol,nlay+1), target  :: tlev_arr
     real(wp), dimension(:,:),            pointer :: tlev_wk
@@ -822,15 +861,10 @@ function source(this,                               &
     !-------------------------------------------------------------------
     ! Compute internal (Planck) source functions at layers and levels,
     !  which depend on mapping from spectral space that creates k-distribution.
-    !$acc enter data copyin(sources)
-    !$acc enter data create(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
-    !$omp target enter data map(alloc:sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
-    !$acc enter data create(sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t)
-    !$omp target enter data map(alloc:sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t)
-    !$acc enter data create(sfc_source_Jac)
-    !$omp target enter data map(alloc:sfc_source_Jac)
-    !$acc enter data create(sources%sfc_source_Jac)
-    !$omp target enter data map(alloc:sources%sfc_source_Jac)
+    !$acc        data copyin(sources) copyout( sources%lay_source, sources%lev_source_inc, sources%lev_source_dec) &
+    !$acc                             copyout( sources%sfc_source, sources%sfc_source_Jac)
+    !$omp target data                 map(from:sources%lay_source, sources%lev_source_inc, sources%lev_source_dec) &
+    !$omp                             map(from:sources%sfc_source, sources%sfc_source_Jac)
 
     !$acc kernels copyout(top_at_1)
     !$omp target map(from:top_at_1)
@@ -844,31 +878,10 @@ function source(this,                               &
                 fmajor, jeta, tropo, jtemp, jpress,                    &
                 this%get_gpoint_bands(), this%get_band_lims_gpoint(), this%planck_frac, this%temp_ref_min,&
                 this%totplnk_delta, this%totplnk, this%gpoint_flavor,  &
-                sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t, &
-                sfc_source_Jac)
-    !$acc parallel loop collapse(2)
-    !$omp target teams distribute parallel do simd collapse(2)
-    do igpt = 1, ngpt
-      do icol = 1, ncol
-        sources%sfc_source    (icol,igpt) = sfc_source_t  (igpt,icol)
-        sources%sfc_source_Jac(icol,igpt) = sfc_source_Jac(igpt,icol)
-      end do
-    end do
-    call reorder123x321(lay_source_t, sources%lay_source)
-    call reorder123x321(lev_source_inc_t, sources%lev_source_inc)
-    call reorder123x321(lev_source_dec_t, sources%lev_source_dec)
-    !
-    ! Transposition of a 2D array, for which we don't have a routine in mo_rrtmgp_util_reorder.
-    !
-    !$acc exit data delete(sfc_source_Jac)
-    !$omp target exit data map(release:sfc_source_Jac)
-    !$acc exit data delete(sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t)
-    !$omp target exit data map(release:sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t)
-    !$acc exit data copyout(sources%sfc_source_Jac)
-    !$omp target exit data map(from:sources%sfc_source_Jac)
-    !$acc exit data copyout(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
-    !$omp target exit data map(from:sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
-    !$acc exit data copyout(sources)
+                sources%sfc_source, sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, &
+                sources%sfc_source_Jac)
+    !$acc end        data
+    !$omp end target data
   end function source
   !--------------------------------------------------------------------------------------------------------------------
   !
@@ -928,7 +941,7 @@ function load_int(this, available_gases, gas_names, key_species,  &
                                                              kminor_start_upper
     character(len = 128) :: err_message
     ! ----
-    !$acc enter data create(this)
+    !$acc enter data copyin(this)
     err_message = init_abs_coeffs(this, &
                                   available_gases, &
                                   gas_names, key_species,    &
@@ -952,17 +965,14 @@ function load_int(this, available_gases, gas_names, key_species,  &
     ! Planck function tables
     !
     allocate(this%totplnk          (size(totplnk,    1), size(totplnk,   2)), &
-             this%planck_frac      (size(planck_frac,1), size(planck_frac,2), size(planck_frac,3), size(planck_frac,4)), &
+             this%planck_frac      (size(planck_frac,4), size(planck_frac,2),size(planck_frac,3), size(planck_frac,1)), &
              this%optimal_angle_fit(size(optimal_angle_fit,    1), size(optimal_angle_fit,   2)))
-    !$acc enter data create(this%totplnk, this%planck_frac, this%optimal_angle_fit)
-    !$omp target enter data map(alloc:this%totplnk, this%planck_frac, this%optimal_angle_fit)
-    !$acc kernels
-    !$omp target
     this%totplnk = totplnk
-    this%planck_frac = planck_frac
+!    this%planck_frac = planck_frac
+    this%planck_frac = RESHAPE(planck_frac,(/size(planck_frac,4), size(planck_frac,2), size(planck_frac,3), size(planck_frac,1)/),ORDER =(/4,2,3,1/))
     this%optimal_angle_fit = optimal_angle_fit
-    !$acc end kernels
-    !$omp end target
+    !$acc        enter data copyin(this%totplnk, this%planck_frac, this%optimal_angle_fit)
+    !$omp target enter data map(to:this%totplnk, this%planck_frac, this%optimal_angle_fit)
 
     ! Temperature steps for Planck function interpolation
     !   Assumes that temperature minimum and max are the same for the absorption coefficient grid and the
@@ -1038,7 +1048,7 @@ function load_ext(this, available_gases, gas_names, key_species,        &
 
     integer :: ngpt
     ! ----
-    !$acc enter data create(this)
+    !$acc enter data copyin(this)
     err_message = init_abs_coeffs(this, &
                                   available_gases, &
                                   gas_names, key_species,    &
@@ -1066,7 +1076,7 @@ function load_ext(this, available_gases, gas_names, key_species,        &
     ngpt = size(solar_quiet)
     allocate(this%solar_source_quiet(ngpt), this%solar_source_facular(ngpt), &
              this%solar_source_sunspot(ngpt), this%solar_source(ngpt))
-    !$acc enter data create(this%solar_source_quiet, this%solar_source_facular, this%solar_source_sunspot, this%solar_source)
+    !$acc        enter data create(   this%solar_source_quiet, this%solar_source_facular, this%solar_source_sunspot, this%solar_source)
     !$omp target enter data map(alloc:this%solar_source_quiet, this%solar_source_facular, this%solar_source_sunspot, this%solar_source)
     !$acc kernels
     !$omp target
@@ -1114,6 +1124,7 @@ function init_abs_coeffs(this, &
     real(wp), dimension(:,:,:),   intent(in) :: vmr_ref
     real(wp), dimension(:,:,:,:), intent(in) :: kmajor
     real(wp), dimension(:,:,:),   intent(in) :: kminor_lower, kminor_upper
+    real(wp), dimension(:,:,:), allocatable  :: kminor_lower_t, kminor_upper_t
     character(len=*),   dimension(:), &
                                   intent(in) :: gas_minor, &
                                                 identifier_minor
@@ -1168,6 +1179,7 @@ function init_abs_coeffs(this, &
     !   gas optics and also present in the host model
     !
     this%gas_names = pack(gas_names,mask=gas_is_present)
+    ! Copy-ins below
 
     allocate(vmr_ref_red(size(vmr_ref,dim=1),0:ngas, &
                          size(vmr_ref,dim=3)))
@@ -1178,6 +1190,8 @@ function init_abs_coeffs(this, &
       vmr_ref_red(:,i,:) = vmr_ref(:,idx+1,:)
     enddo
     call move_alloc(vmr_ref_red, this%vmr_ref)
+    !$acc        enter data copyin(this%vmr_ref, this%gas_names)
+    !$omp target enter data map(to:this%vmr_ref, this%gas_names)
     !
     ! Reduce minor arrays so variables only contain minor gases that are available
     ! Reduce size of minor Arrays
@@ -1216,15 +1230,25 @@ function init_abs_coeffs(this, &
                              scaling_gas_upper_red, &
                              this%scale_by_complement_upper, &
                              this%kminor_start_upper)
+   !$acc        enter data copyin(this%minor_limits_gpt_lower, this%minor_limits_gpt_upper)
+   !$omp target enter data map(to:this%minor_limits_gpt_lower, this%minor_limits_gpt_upper)
+   !$acc        enter data copyin(this%minor_scales_with_density_lower, this%minor_scales_with_density_upper)
+   !$omp target enter data map(to:this%minor_scales_with_density_lower, this%minor_scales_with_density_upper)
+   !$acc        enter data copyin(this%scale_by_complement_lower, this%scale_by_complement_upper)
+   !$omp target enter data map(to:this%scale_by_complement_lower, this%scale_by_complement_upper)
+   !$acc        enter data copyin(this%kminor_start_lower, this%kminor_start_upper)
+   !$omp target enter data map(to:this%kminor_start_lower, this%kminor_start_upper)
+   !$acc        enter data copyin(this%kminor_lower, this%kminor_upper)
+   !$omp target enter data map(to:this%kminor_lower, this%kminor_upper)
 
     ! Arrays not reduced by the presence, or lack thereof, of a gas
     allocate(this%press_ref(size(press_ref)), this%temp_ref(size(temp_ref)), &
-             this%kmajor(size(kmajor,1),size(kmajor,2),size(kmajor,3),size(kmajor,4)))
-    this%press_ref = press_ref
-    this%temp_ref  = temp_ref
-    this%kmajor    = kmajor
-    !$acc enter data copyin(this%kmajor)
-    !$omp target enter data map(to:this%kmajor)
+             this%kmajor(size(kmajor,4),size(kmajor,2),size(kmajor,3),size(kmajor,1)))
+    this%press_ref(:) = press_ref(:)
+    this%temp_ref(:)  = temp_ref(:)
+    this%kmajor = RESHAPE(kmajor,(/size(kmajor,4),size(kmajor,2),size(kmajor,3),size(kmajor,1)/), ORDER= (/4,2,3,1/))
+    !$acc        enter data copyin(this%press_ref, this%temp_ref, this%kmajor)
+    !$omp target enter data map(to:this%press_ref, this%temp_ref, this%kmajor)
 
 
     if(allocated(rayl_lower) .neqv. allocated(rayl_upper)) then
@@ -1232,10 +1256,10 @@ function init_abs_coeffs(this, &
       return
     end if
     if (allocated(rayl_lower)) then
-      allocate(this%krayl(size(rayl_lower,dim=1),size(rayl_lower,dim=2),size(rayl_lower,dim=3),2))
-      this%krayl(:,:,:,1) = rayl_lower
-      this%krayl(:,:,:,2) = rayl_upper
-      !$acc enter data copyin(this%krayl)
+      allocate(this%krayl(size(rayl_lower,dim=3),size(rayl_lower,dim=2),size(rayl_lower,dim=1),2))
+      this%krayl(:,:,:,1) = RESHAPE(rayl_lower,(/size(rayl_lower,dim=3),size(rayl_lower,dim=2),size(rayl_lower,dim=1)/),ORDER =(/3,2,1/))
+      this%krayl(:,:,:,2) = RESHAPE(rayl_upper,(/size(rayl_lower,dim=3),size(rayl_lower,dim=2),size(rayl_lower,dim=1)/),ORDER =(/3,2,1/))
+      !$acc        enter data copyin(this%krayl)
       !$omp target enter data map(to:this%krayl)
     end if
 
@@ -1243,23 +1267,22 @@ function init_abs_coeffs(this, &
     ! creates log reference pressure
     allocate(this%press_ref_log(size(this%press_ref)))
     this%press_ref_log(:) = log(this%press_ref(:))
-    !$acc enter data copyin(this%press_ref_log)
+    !$acc        enter data copyin(this%press_ref_log)
     !$omp target enter data map(to:this%press_ref_log)
 
-
     ! log scale of reference pressure
     this%press_ref_trop_log = log(press_ref_trop)
 
     ! Get index of gas (if present) for determining col_gas
-    call create_idx_minor(this%gas_names, gas_minor, identifier_minor, minor_gases_lower_red, &
-      this%idx_minor_lower)
-    call create_idx_minor(this%gas_names, gas_minor, identifier_minor, minor_gases_upper_red, &
-      this%idx_minor_upper)
+    call create_idx_minor(this%gas_names, gas_minor, identifier_minor, minor_gases_lower_red, this%idx_minor_lower)
+    call create_idx_minor(this%gas_names, gas_minor, identifier_minor, minor_gases_upper_red, this%idx_minor_upper)
     ! Get index of gas (if present) that has special treatment in density scaling
-    call create_idx_minor_scaling(this%gas_names, scaling_gas_lower_red, &
-      this%idx_minor_scaling_lower)
-    call create_idx_minor_scaling(this%gas_names, scaling_gas_upper_red, &
-      this%idx_minor_scaling_upper)
+    call create_idx_minor_scaling(this%gas_names, scaling_gas_lower_red, this%idx_minor_scaling_lower)
+    call create_idx_minor_scaling(this%gas_names, scaling_gas_upper_red, this%idx_minor_scaling_upper)
+    !$acc        enter data copyin(this%idx_minor_lower, this%idx_minor_upper)
+    !$omp target enter data map(to:this%idx_minor_lower, this%idx_minor_upper)
+    !$acc        enter data copyin(this%idx_minor_scaling_lower, this%idx_minor_scaling_upper)
+    !$omp target enter data map(to:this%idx_minor_scaling_lower, this%idx_minor_scaling_upper)
 
     ! create flavor list
     ! Reduce (remap) key_species list; checks that all key gases are present in incoming
@@ -1271,6 +1294,7 @@ function init_abs_coeffs(this, &
     call create_flavor(key_species_red, this%flavor)
     ! create gpoint_flavor list
     call create_gpoint_flavor(key_species_red, this%get_gpoint_bands(), this%flavor, this%gpoint_flavor)
+    !Copy-ins at end of subroutine
 
     ! minimum, maximum reference temperature, pressure -- assumes low-to-high ordering
     !   for T, high-to-low ordering for p
@@ -1294,6 +1318,8 @@ function init_abs_coeffs(this, &
         if (this%flavor(i,j) /= 0) this%is_key(this%flavor(i,j)) = .true.
       end do
     end do
+    !$acc        enter data copyin(this%flavor, this%gpoint_flavor, this%is_key)
+    !$omp target enter data map(to:this%flavor, this%gpoint_flavor, this%is_key)
 
   end function init_abs_coeffs
   ! ----------------------------------------------------------------------------------------------------
@@ -1436,8 +1462,8 @@ function get_col_dry(vmr_h2o, plev, latitude) result(col_dry)
     ! ------------------------------------------------
     ncol = size(plev, dim=1)
     nlev = size(plev, dim=2)
-    !$acc enter data create(g0)
-    !$omp target enter data map(alloc:g0)
+    !$acc        data    create(g0)
+    !$omp target data map(alloc:g0)
     if(present(latitude)) then
       ! A purely OpenACC implementation would probably compute g0 within the kernel below
       !$acc parallel loop
@@ -1453,8 +1479,8 @@ function get_col_dry(vmr_h2o, plev, latitude) result(col_dry)
       end do
     end if
 
-    !$acc parallel loop gang vector collapse(2) copyin(plev,vmr_h2o) copyout(col_dry)
-    !$omp target teams distribute parallel do simd collapse(2) map(to:plev, vmr_h2o) map(from:col_dry)
+    !$acc                parallel loop gang vector collapse(2) copyin(plev,vmr_h2o)  copyout(col_dry)
+    !$omp target teams distribute parallel do simd collapse(2) map(to:plev,vmr_h2o) map(from:col_dry)
     do ilev = 1, nlev-1
       do icol = 1, ncol
         delta_plev = abs(plev(icol,ilev) - plev(icol,ilev+1))
@@ -1464,8 +1490,8 @@ function get_col_dry(vmr_h2o, plev, latitude) result(col_dry)
         col_dry(icol,ilev) = 10._wp * delta_plev * avogad * fact/(1000._wp*m_air*100._wp*g0(icol))
       end do
     end do
-    !$acc exit data delete (g0)
-    !$omp target exit data map(release:g0)
+    !$acc end        data
+    !$omp end target data
   end function get_col_dry
   !--------------------------------------------------------------------------------------------------------------------
   !
@@ -1496,8 +1522,8 @@ function compute_optimal_angles(this, optical_props, optimal_angles) result(err_
     !
     ! column transmissivity
     !
-    !$acc parallel loop gang vector collapse(2) copyin(optical_props, optical_props%tau, optical_props%gpt2band) copyout(optimal_angles)
-    !$omp target teams distribute parallel do simd collapse(2) map(to: optical_props%tau, optical_props%gpt2band) map(from:optimal_angles)
+    !$acc                parallel loop gang vector collapse(2) copyin(optical_props, optical_props%tau, optical_props%gpt2band) copyout(optimal_angles)
+    !$omp target teams distribute parallel do simd collapse(2) map(to:optical_props%tau, optical_props%gpt2band) map(from:optimal_angles)
     do icol = 1, ncol
       do igpt = 1, ngpt
         !
@@ -1608,14 +1634,11 @@ subroutine create_idx_minor(gas_names, &
     integer :: idx_mnr
     allocate(idx_minor_atm(size(minor_gases_atm,dim=1)))
     do imnr = 1, size(minor_gases_atm,dim=1) ! loop over minor absorbers in each band
-          ! Find identifying string for minor species in list of possible identifiers (e.g. h2o_slf)
-          idx_mnr     = string_loc_in_array(minor_gases_atm(imnr), identifier_minor)
-          ! Find name of gas associated with minor species identifier (e.g. h2o)
-          idx_minor_atm(imnr) = string_loc_in_array(gas_minor(idx_mnr),    gas_names)
+      ! Find identifying string for minor species in list of possible identifiers (e.g. h2o_slf)
+      idx_mnr     = string_loc_in_array(minor_gases_atm(imnr), identifier_minor)
+      ! Find name of gas associated with minor species identifier (e.g. h2o)
+      idx_minor_atm(imnr) = string_loc_in_array(gas_minor(idx_mnr),    gas_names)
     enddo
-
-    !$acc enter data copyin(idx_minor_atm)
-    !$omp target enter data map(to:idx_minor_atm)
   end subroutine create_idx_minor
 
   ! ---------------------------------------------------------------------------------------
@@ -1633,12 +1656,9 @@ subroutine create_idx_minor_scaling(gas_names, &
     integer :: imnr
     allocate(idx_minor_scaling_atm(size(scaling_gas_atm,dim=1)))
     do imnr = 1, size(scaling_gas_atm,dim=1) ! loop over minor absorbers in each band
-          ! This will be -1 if there's no interacting gas
-          idx_minor_scaling_atm(imnr) = string_loc_in_array(scaling_gas_atm(imnr), gas_names)
+      ! This will be -1 if there's no interacting gas
+      idx_minor_scaling_atm(imnr) = string_loc_in_array(scaling_gas_atm(imnr), gas_names)
     enddo
-
-    !$acc enter data copyin(idx_minor_scaling_atm)
-    !$omp target enter data map(to:idx_minor_scaling_atm)
   end subroutine create_idx_minor_scaling
   ! ---------------------------------------------------------------------------------------
   subroutine create_key_species_reduce(gas_names,gas_names_red, &
@@ -1678,6 +1698,7 @@ subroutine create_key_species_reduce(gas_names,gas_names_red, &
   end subroutine create_key_species_reduce
 
 ! ---------------------------------------------------------------------------------------
+
   subroutine reduce_minor_arrays(available_gases, &
                            gas_names, &
                            gas_minor,identifier_minor,&
@@ -1728,6 +1749,7 @@ subroutine reduce_minor_arrays(available_gases, &
     integer :: icnt, n_elim, ng
     logical, dimension(:), allocatable :: gas_is_present
     integer, dimension(:), allocatable :: indexes
+    real(wp),         dimension(:,:,:), allocatable :: kminor_atm_red_t
 
     nm = size(minor_gases_atm)
     tot_g=0
@@ -1747,13 +1769,14 @@ subroutine reduce_minor_arrays(available_gases, &
              scale_by_complement_atm_red      (red_nm), &
              kminor_start_atm_red             (red_nm))
     allocate(minor_limits_gpt_atm_red(2, red_nm))
-    allocate(kminor_atm_red(tot_g, size(kminor_atm,2), size(kminor_atm,3)))
+    allocate(kminor_atm_red_t(tot_g, size(kminor_atm,2), size(kminor_atm,3)))
+    allocate(kminor_atm_red(size(kminor_atm,3),size(kminor_atm,2),tot_g))
 
     if ((red_nm .eq. nm)) then
       ! Character data not allowed in OpenACC regions?
       minor_gases_atm_red         = minor_gases_atm
       scaling_gas_atm_red         = scaling_gas_atm
-      kminor_atm_red              = kminor_atm
+      kminor_atm_red_t            = kminor_atm
       minor_limits_gpt_atm_red    = minor_limits_gpt_atm
       minor_scales_with_density_atm_red = minor_scales_with_density_atm
       scale_by_complement_atm_red = scale_by_complement_atm
@@ -1781,7 +1804,7 @@ subroutine reduce_minor_arrays(available_gases, &
           kminor_start_atm_red(icnt) = kminor_start_atm(i)-n_elim
           ks = kminor_start_atm_red(icnt)
           do j = 1, ng
-            kminor_atm_red(kminor_start_atm_red(icnt)+j-1,:,:) = &
+            kminor_atm_red_t(kminor_start_atm_red(icnt)+j-1,:,:) = &
               kminor_atm(kminor_start_atm(i)+j-1,:,:)
           enddo
         else
@@ -1789,10 +1812,9 @@ subroutine reduce_minor_arrays(available_gases, &
         endif
       enddo
     endif
-    !$acc enter data copyin(kminor_atm_red, kminor_start_atm_red, minor_limits_gpt_atm_red, &
-    !$acc                   minor_scales_with_density_atm_red, scale_by_complement_atm_red)
-    !$omp target enter data map(to:kminor_atm_red, kminor_start_atm_red, minor_limits_gpt_atm_red, &
-    !$omp                   minor_scales_with_density_atm_red, scale_by_complement_atm_red)
+
+    kminor_atm_red = RESHAPE(kminor_atm_red_t,(/size(kminor_atm_red_t,dim=3),size(kminor_atm_red_t,dim=2),size(kminor_atm_red_t,dim=1)/), ORDER=(/3,2,1/))
+    deallocate(kminor_atm_red_t)
   end subroutine reduce_minor_arrays
 
 ! ---------------------------------------------------------------------------------------
@@ -1839,79 +1861,89 @@ end subroutine create_gpoint_flavor
  ! Utility function to combine optical depths from gas absorption and Rayleigh scattering
  !   (and reorder them for convenience, while we're at it)
  !
- subroutine combine_and_reorder(tau, tau_rayleigh, has_rayleigh, optical_props)
-    real(wp), dimension(:,:,:),   intent(in) :: tau
-    real(wp), dimension(:,:,:),   intent(in) :: tau_rayleigh
-    logical,                      intent(in) :: has_rayleigh
+ subroutine combine_abs_and_rayleigh(tau, tau_rayleigh, optical_props)
+    real(wp), dimension(:,:,:),   intent(in   ) :: tau
+    real(wp), dimension(:,:,:),   intent(in   ) :: tau_rayleigh
     class(ty_optical_props_arry), intent(inout) :: optical_props
 
-    integer :: ncol, nlay, ngpt, nmom
+    integer :: icol, ilay, igpt, ncol, nlay, ngpt, nmom
+    real(wp) :: t
 
-    ncol = size(tau, 3)
+    ncol = size(tau, 1)
     nlay = size(tau, 2)
-    ngpt = size(tau, 1)
-    !$acc enter data copyin(optical_props)
-    if (.not. has_rayleigh) then
-      ! index reorder (ngpt, nlay, ncol) -> (ncol,nlay,gpt)
-      !$acc enter data copyin(tau)
-      !$omp target enter data map(to:tau)
-      !$acc enter data create(optical_props%tau)
-      !$omp target enter data map(alloc:optical_props%tau)
-      call reorder123x321(tau, optical_props%tau)
-      select type(optical_props)
-        type is (ty_optical_props_2str)
-          !$acc enter data create(optical_props%ssa, optical_props%g)
-          !!$omp target enter data map(alloc:optical_props%ssa, optical_props%g) ! Not needed with Cray compiler
-          call zero_array(     ncol,nlay,ngpt,optical_props%ssa)
-          call zero_array(     ncol,nlay,ngpt,optical_props%g  )
-          !$acc exit data copyout(optical_props%ssa, optical_props%g)
-          !!$omp target exit data map(from:optical_props%ssa, optical_props%g) ! Not needed with Cray compiler
-        type is (ty_optical_props_nstr) ! We ought to be able to combine this with above
-          nmom = size(optical_props%p, 1)
-          !$acc enter data create(optical_props%ssa, optical_props%p)
-          !$omp target enter data map(alloc:optical_props%ssa, optical_props%p)
-          call zero_array(     ncol,nlay,ngpt,optical_props%ssa)
-          call zero_array(nmom,ncol,nlay,ngpt,optical_props%p  )
-          !$acc exit data copyout(optical_props%ssa, optical_props%p)
-          !$omp target exit data map(from:optical_props%ssa, optical_props%p)
-        end select
-      !$acc exit data copyout(optical_props%tau)
-      !$omp target exit data map(from:optical_props%tau)
-      !$acc exit data delete(tau)
-      !$omp target exit data map(release:tau)
-    else
-      ! combine optical depth and rayleigh scattering
-      !$acc enter data copyin(tau, tau_rayleigh)
-      !$omp target enter data map(to:tau, tau_rayleigh)
-      select type(optical_props)
-        type is (ty_optical_props_1scl)
-          ! User is asking for absorption optical depth
-          !$acc enter data create(optical_props%tau)
-          !$omp target enter data map(alloc:optical_props%tau)
-          call reorder123x321(tau, optical_props%tau)
-          !$acc exit data copyout(optical_props%tau)
-          !$omp target exit data map(from:optical_props%tau)
-        type is (ty_optical_props_2str)
-          !$acc enter data create(optical_props%tau, optical_props%ssa, optical_props%g)
-          !$omp target enter data map(alloc:optical_props%tau, optical_props%ssa, optical_props%g)
-          call combine_and_reorder_2str(ncol, nlay, ngpt,       tau, tau_rayleigh, &
-                                        optical_props%tau, optical_props%ssa, optical_props%g)
-          !$acc exit data copyout(optical_props%tau, optical_props%ssa, optical_props%g)
-          !$omp target exit data map(from:optical_props%tau, optical_props%ssa, optical_props%g)
-        type is (ty_optical_props_nstr) ! We ought to be able to combine this with above
-          nmom = size(optical_props%p, 1)
-          !$acc enter data create(optical_props%tau, optical_props%ssa, optical_props%p)
-          !$omp target enter data map(alloc:optical_props%tau, optical_props%ssa, optical_props%p)
-          call combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau, tau_rayleigh, &
-                                        optical_props%tau, optical_props%ssa, optical_props%p)
-          !$acc exit data copyout(optical_props%tau, optical_props%ssa, optical_props%p)
-          !$omp target exit data map(from:optical_props%tau, optical_props%ssa, optical_props%p)
+    ngpt = size(tau, 3)
+    select type(optical_props)
+      type is (ty_optical_props_1scl)
+        !
+        ! Extinction optical depth
+        !
+        !$acc parallel loop gang vector                collapse(3)
+        !$omp target teams distribute parallel do simd collapse(3)
+        do igpt = 1, ngpt
+          do ilay = 1, nlay
+            do icol = 1, ncol
+              optical_props%tau(icol,ilay,igpt) = tau(icol,ilay,igpt) + &
+                                         tau_rayleigh(icol,ilay,igpt)
+            end do
+          end do
+        end do
+      !
+      ! asymmetry factor or phase function moments
+      !
+      type is (ty_optical_props_2str)
+        !
+        ! Extinction optical depth and single scattering albedo
+        !
+        !$acc parallel loop gang vector                collapse(3)
+        !$omp target teams distribute parallel do simd collapse(3)
+        do igpt = 1, ngpt
+          do ilay = 1, nlay
+            do icol = 1, ncol
+              t = tau(icol,ilay,igpt) + tau_rayleigh(icol,ilay,igpt)
+              if(t > 2._wp * tiny(t)) then
+                 optical_props%ssa(icol,ilay,igpt) = tau_rayleigh(icol,ilay,igpt) / t
+               else
+                 optical_props%ssa(icol,ilay,igpt) = 0._wp
+               end if
+               optical_props%tau(icol,ilay,igpt) = t
+             end do
+          end do
+        end do
+        call zero_array(ncol, nlay, ngpt, optical_props%g)
+      type is (ty_optical_props_nstr)
+        !
+        ! Extinction optical depth and single scattering albedo
+        !
+        !$acc parallel loop gang vector                collapse(3)
+        !$omp target teams distribute parallel do simd collapse(3)
+        do igpt = 1, ngpt
+          do ilay = 1, nlay
+            do icol = 1, ncol
+              t = tau(icol,ilay,igpt) + tau_rayleigh(icol,ilay,igpt)
+              if(t > 2._wp * tiny(t)) then
+                 optical_props%ssa(icol,ilay,igpt) = tau_rayleigh(icol,ilay,igpt) / t
+               else
+                 optical_props%ssa(icol,ilay,igpt) = 0._wp
+               end if
+               optical_props%tau(icol,ilay,igpt) = t
+             end do
+          end do
+        end do
+        nmom = size(optical_props%p, 1)
+        call zero_array(nmom, ncol, nlay, ngpt, optical_props%p)
+        if(nmom >= 2) then
+          !$acc parallel loop gang vector                collapse(3)
+          !$omp target teams distribute parallel do simd collapse(3)
+          do igpt = 1, ngpt
+            do ilay = 1, nlay
+              do icol = 1, ncol
+                optical_props%p(2,icol,ilay,igpt) = 0.1_wp
+              end do
+            end do
+          end do
+        end if
       end select
-      !$acc exit data delete(tau, tau_rayleigh)
-      !$omp target exit data map(release:tau, tau_rayleigh)
-    end if
-    !$acc exit data copyout(optical_props)
-  end subroutine combine_and_reorder
+  end subroutine combine_abs_and_rayleigh
 
   !--------------------------------------------------------------------------------------------------------------------
   ! Sizes of tables: pressure, temperate, eta (mixing fraction)
@@ -1947,7 +1979,7 @@ pure function get_ntemp(this)
     class(ty_gas_optics_rrtmgp), intent(in) :: this
     integer                          :: get_ntemp
 
-    get_ntemp = size(this%kmajor,dim=4)
+    get_ntemp = size(this%kmajor,dim=1)
   end function get_ntemp
   ! --------------------------------------------------------------------------------------
   !
diff --git a/tests/clear_sky_regression.F90 b/tests/clear_sky_regression.F90
index a9fac4516..8f08d916c 100644
--- a/tests/clear_sky_regression.F90
+++ b/tests/clear_sky_regression.F90
@@ -141,7 +141,7 @@ program rte_clear_sky_regression
   is_lw = .not. is_sw
   print *, "k-distribution is for the " // merge("longwave ", "shortwave", is_lw)
   print *, "  pressure    limits (Pa):", k_dist%get_press_min(), k_dist%get_press_max()
-  print *, "  temperature limits (Pa):", k_dist%get_temp_min(),  k_dist%get_temp_max()
+  print *, "  temperature limits (K):", k_dist%get_temp_min(),  k_dist%get_temp_max()
   !
   ! Problem sizes
   !
@@ -218,6 +218,7 @@ program rte_clear_sky_regression
       call load_and_init(k_dist_2, k_dist_file_2, gas_concs)
       print *, "Alternate k-distribution is for the " // merge("longwave ", "shortwave", .not. k_dist_2%source_is_external())
       print *, "  Resolution :", k_dist_2%get_nband(), k_dist_2%get_ngpt()
+      ngpt = k_dist_2%get_ngpt()
       call atmos%finalize()
       call make_optical_props_1scl(k_dist_2)
       call stop_on_err(lw_sources%alloc(ncol, nlay, k_dist_2))
@@ -566,6 +567,7 @@ end subroutine lw_clear_sky_incr
   subroutine lw_clear_sky_alt
     real(wp), dimension(ncol, nlay+1), target :: flux_net
     real(wp), dimension(ncol, nlay)           :: heating_rate
+    real(wp), dimension(ncol, ngpt)           :: lw_Ds
 
     fluxes%flux_net => flux_net
     call stop_on_err(k_dist_2%gas_optics(p_lay, p_lev, &
@@ -581,10 +583,21 @@ subroutine lw_clear_sky_alt
     call write_broadband_field(input_file, flux_up,  "lw_flux_up_alt",  "LW flux up, fewer g-points")
     call write_broadband_field(input_file, flux_dn,  "lw_flux_dn_alt",  "LW flux dn, fewer g-points")
     call write_broadband_field(input_file, flux_net, "lw_flux_net_alt", "LW flux ne, fewer g-pointst")
-
     call stop_on_err(compute_heating_rate(flux_up, flux_dn, p_lev, heating_rate))
     call write_broadband_field(input_file, heating_rate,  &
                                                      "lw_flux_hr_alt",  "LW heating rate, fewer g-points", vert_dim_name = "layer")
+
+    call stop_on_err(k_dist_2%compute_optimal_angles(atmos, lw_Ds))
+    call stop_on_err(rte_lw(atmos, top_at_1, &
+                            lw_sources,      &
+                            sfc_emis,        &
+                            fluxes, lw_Ds=lw_Ds))
+    call write_broadband_field(input_file, flux_up,  "lw_flux_up_alt_oa",  "LW flux up, fewer g-points, opt. angle")
+    call write_broadband_field(input_file, flux_dn,  "lw_flux_dn_alt_oa",  "LW flux dn, fewer g-points, opt. angle")
+    call write_broadband_field(input_file, flux_net, "lw_flux_net_alt_oa", "LW flux ne, fewer g-points, opt. angle")
+    call stop_on_err(compute_heating_rate(flux_up, flux_dn, p_lev, heating_rate))
+    call write_broadband_field(input_file, heating_rate,  &
+                                                     "lw_flux_hr_alt_oa",  "LW heating rate, fewer g-points, opt. angle", vert_dim_name = "layer")
     call k_dist_2%finalize()
   end subroutine lw_clear_sky_alt
   ! ----------------------------------------------------------------------------
diff --git a/tests/validation-plots.py b/tests/validation-plots.py
index 71389fd43..ebbd9274f 100644
--- a/tests/validation-plots.py
+++ b/tests/validation-plots.py
@@ -81,18 +81,20 @@ def construct_lbl_esgf_name(var, esgf_node="llnl"):
         #
         # Accuracy - 3-angle and single-angle
         #
-        variants = [[gpi.lw_flux_dn, gpi.lw_flux_dn_alt, gpi.lw_flux_dn_optang, gpi.lw_flux_dn_3ang, gpi.lw_flux_dn_2str],
-                    [gpi.lw_flux_up, gpi.lw_flux_up_alt, gpi.lw_flux_up_optang, gpi.lw_flux_up_3ang, gpi.lw_flux_up_2str],
+        variants = [[gpi.lw_flux_dn, gpi.lw_flux_dn_alt, gpi.lw_flux_dn_optang, gpi.lw_flux_dn_alt_oa, gpi.lw_flux_dn_3ang, gpi.lw_flux_dn_2str, gpi.lw_flux_dn_1rescl],
+                    [gpi.lw_flux_up, gpi.lw_flux_up_alt, gpi.lw_flux_up_optang, gpi.lw_flux_up_alt_oa, gpi.lw_flux_up_3ang, gpi.lw_flux_up_2str, gpi.lw_flux_up_1rescl],
                     [gpi.lw_flux_net,
                     gpi.lw_flux_net_alt,
                     gpi.lw_flux_dn_optang - gpi.lw_flux_up_optang,
+                    gpi.lw_flux_dn_alt_oa - gpi.lw_flux_up_alt_oa,
                     gpi.lw_flux_dn_3ang   - gpi.lw_flux_up_3ang,
-                    gpi.lw_flux_dn_2str   - gpi.lw_flux_up_2str]]
+                    gpi.lw_flux_dn_2str   - gpi.lw_flux_up_2str,
+                    gpi.lw_flux_dn_1rescl - gpi.lw_flux_up_1rescl]]
         refs = [lbli.rld,  lbli.rlu,  lbli.rld -  lbli.rlu]
         titles = ["Accuracy wrt LBLRTM: LW down", "Accuracy wrt LBLRTM: LW up", "Accuracy: LW net"]
         for v, r, t in zip(variants, refs, titles):
             make_comparison_plot(v, \
-                                 labels = ["default","fewer-g-points", "optimal-angle", "3-angle", "2-stream"], \
+                                 labels = ["default","fewer-g-points", "optimal-angle", "fewer points + optimal-angle", "3-angle", "2-stream", "rescaled"], \
                                  reference = r,            \
                                  vscale = plev/100.)
             plt.ylabel("Pressure (Pa)")