From feb8e1567aa75a30a5d570d7de18f54d448e9835 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Tue, 8 Apr 2014 10:21:20 -0400 Subject: [PATCH 01/14] make visc_bound_rem to be private to solve slowdown issue --- src/parameterizations/lateral/MOM_hor_visc.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parameterizations/lateral/MOM_hor_visc.F90 b/src/parameterizations/lateral/MOM_hor_visc.F90 index 28f67244d2..e00cfa801b 100644 --- a/src/parameterizations/lateral/MOM_hor_visc.F90 +++ b/src/parameterizations/lateral/MOM_hor_visc.F90 @@ -296,7 +296,7 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, CS, OBC) "VarMix%Res_fn_q both need to be associated with Resoln_scaled_Kh.") endif -!$OMP parallel do default(shared) private(i, j, k, u0, v0, sh_xx, str_xx, & +!$OMP parallel do default(shared) private(i, j, k, u0, v0, sh_xx, str_xx, visc_bound_rem,& !$OMP sh_xy, str_xy, Ah, Kh, AhSm, KhSm, & !$OMP Shear_mag, huq, hvq, hq, Kh_scale, hrat_min) do k=1,nz From 5c741f6bc07be589177f89b087ea9332a9311bb0 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Tue, 8 Apr 2014 10:22:43 -0400 Subject: [PATCH 02/14] implement openmp --- src/tracer/MOM_tracer_advect.F90 | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/tracer/MOM_tracer_advect.F90 b/src/tracer/MOM_tracer_advect.F90 index c5c1d8ef47..b95d7dc311 100644 --- a/src/tracer/MOM_tracer_advect.F90 +++ b/src/tracer/MOM_tracer_advect.F90 @@ -136,9 +136,11 @@ subroutine advect_tracer(h_end, uhtr, vhtr, OBC, dt, G, CS, Reg) integer :: nsten_halo ! The number of stensils that fit in the halos. integer :: i, j, k, m, is, ie, js, je, isd, ied, jsd, jed, nz, itt, ntr, do_any integer :: isv, iev, jsv, jev ! The valid range of the indices. + integer :: IsdB, IedB, JsdB, JedB is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec ; nz = G%ke isd = G%isd ; ied = G%ied ; jsd = G%jsd ; jed = G%jed + IsdB = G%IsdB ; IedB = G%IedB ; JsdB = G%JsdB ; JedB = G%JedB landvolfill = 1.0e-20 ! This is arbitrary, but must be positive. stensil = 2 ! The scheme's stensil; 2 for PLM. @@ -156,12 +158,15 @@ subroutine advect_tracer(h_end, uhtr, vhtr, OBC, dt, G, CS, Reg) max_iter = 2*INT(CEILING(dt/CS%dt)) + 1 +!$OMP parallel default(shared) + ! This initializes the halos of uhr and vhr because pass_vector might do ! calculations on them, even though they are never used. - uhr(:,:,:) = 0.0 ; vhr(:,:,:) = 0.0 - hprev(:,:,:) = landvolfill - - do k=1,nz +!$OMP do + do k = 1, nz + do j = jsd, jed; do i = IsdB, IedB; uhr(i,j,k) = 0.0; enddo ; enddo + do j = jsdB, jedB; do i = Isd, Ied; vhr(i,j,k) = 0.0; enddo ; enddo + do j = jsd, jed; do i = Isd, Ied; hprev(i,j,k) = 0.0; enddo ; enddo domore_k(k)=1 ! Put the remaining (total) thickness fluxes into uhr and vhr. do j=js,je ; do I=is-1,ie ; uhr(I,j,k) = uhtr(I,j,k) ; enddo ; enddo @@ -179,13 +184,16 @@ subroutine advect_tracer(h_end, uhtr, vhtr, OBC, dt, G, CS, Reg) max(0.0, 1.0e-13*hprev(i,j,k) - G%areaT(i,j)*h_end(i,j,k)) enddo ; enddo enddo + +!$OMP do do j=jsd,jed ; do I=isd,ied-1 uh_neglect(I,j) = G%H_subroundoff*MIN(G%areaT(i,j),G%areaT(i+1,j)) enddo ; enddo +!$OMP do do J=jsd,jed-1 ; do i=isd,ied vh_neglect(i,J) = G%H_subroundoff*MIN(G%areaT(i,j),G%areaT(i,j+1)) enddo ; enddo - +!$OMP do do m=1,ntr if (associated(Tr(m)%ad_x)) then do k=1,nz ; do j=jsd,jed ; do i=isd,ied @@ -204,6 +212,7 @@ subroutine advect_tracer(h_end, uhtr, vhtr, OBC, dt, G, CS, Reg) do J=jsd,jed ; do i=isd,ied ; Tr(m)%ad2d_y(i,J) = 0.0 ; enddo ; enddo endif enddo +!$OMP end parallel isv = is ; iev = ie ; jsv = js ; jev = je @@ -222,6 +231,7 @@ subroutine advect_tracer(h_end, uhtr, vhtr, OBC, dt, G, CS, Reg) ! Reevaluate domore_u & domore_v unless the valid range is the same size as ! before. Also, do this if there is Strang splitting. if ((nsten_halo > 1) .or. (itt==1)) then +!$OMP parallel do default(shared) do k=1,nz ; if (domore_k(k) > 0) then do j=jsv,jev ; if (.not.domore_u(j,k)) then do i=isv+stensil-1,iev-stensil; if (uhr(I,j,k) /= 0.0) then @@ -248,6 +258,7 @@ subroutine advect_tracer(h_end, uhtr, vhtr, OBC, dt, G, CS, Reg) isv = isv + stensil ; iev = iev - stensil jsv = jsv + stensil ; jev = jev - stensil +!$OMP parallel do default(shared) do k=1,nz ; if (domore_k(k) > 0) then ! To ensure positive definiteness of the thickness at each iteration, the ! mass fluxes out of each layer are checked each step, and limited to keep From b70a603139b1a5742e09c240244a1d89e1a55953 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Tue, 8 Apr 2014 10:23:44 -0400 Subject: [PATCH 03/14] implement openmp --- src/tracer/MOM_tracer_registry.F90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tracer/MOM_tracer_registry.F90 b/src/tracer/MOM_tracer_registry.F90 index 0edef9262e..8faa2ad779 100644 --- a/src/tracer/MOM_tracer_registry.F90 +++ b/src/tracer/MOM_tracer_registry.F90 @@ -308,6 +308,7 @@ subroutine tracer_vertdiff(h_old, ea, eb, dt, tr, G, & if (present(sink_rate)) then +!$OMP parallel do default(shared) private(sink,h_minus_dsink,b_denom_1,b1,d1,h_tr,c1) do j=js,je ! Find the sinking rates at all interfaces, limiting them if necesary ! so that the characteristics do not cross within a timestep. @@ -375,6 +376,7 @@ subroutine tracer_vertdiff(h_old, ea, eb, dt, tr, G, & endif ; enddo ; enddo enddo else +!$OMP parallel do default(shared) private(h_tr,b_denom_1,b1,d1,c1) do j=js,je do i=is,ie ; if (G%mask2dT(i,j) > 0.5) then h_tr = h_old(i,j,1) + h_neglect From afd1b07911940a59e9e964e494ef070e30accab2 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Wed, 9 Apr 2014 10:32:16 -0400 Subject: [PATCH 04/14] implement openmp --- .../vertical/MOM_geothermal.F90 | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/parameterizations/vertical/MOM_geothermal.F90 b/src/parameterizations/vertical/MOM_geothermal.F90 index d8631aae36..5995d70b7c 100644 --- a/src/parameterizations/vertical/MOM_geothermal.F90 +++ b/src/parameterizations/vertical/MOM_geothermal.F90 @@ -115,7 +115,7 @@ subroutine geothermal(h, tv, dt, ea, eb, G, CS) ! (in) CS - The control structure returned by a previous call to ! geothermal_init. - real :: resid(SZI_(G),SZJ_(G)) +! real :: resid(SZI_(G),SZJ_(G)) !z1l: never been used. real, dimension(SZI_(G)) :: & heat_rem, & ! The remaining heat, in H degC. h_geo_rem, &! The remaining thickness to which to apply the geothermal @@ -173,11 +173,12 @@ subroutine geothermal(h, tv, dt, ea, eb, G, CS) if (.not.ASSOCIATED(tv%T)) call MOM_error(FATAL, "MOM geothermal: "//& "Geothermal heating can only be applied if T & S are state variables.") - do i=is,ie ; do j=js,je - resid(i,j) = tv%internal_heat(i,j) - enddo ; enddo - +! do i=is,ie ; do j=js,je +! resid(i,j) = tv%internal_heat(i,j) +! enddo ; enddo +!$OMP parallel do default(private) shared(is,ie,js,je,G,CS,dt,Irho_cp,nkmb,tv,p_Ref, & +!$OMP h,Angstrom,nz,H_neglect,eb) do j=js,je ! 1. Only work on columns that are being heated. ! 2. Find the deepest layer with any mass. @@ -345,10 +346,10 @@ subroutine geothermal(h, tv, dt, ea, eb, G, CS) enddo ; endif enddo ! j-loop - do i=is,ie ; do j=js,je - resid(i,j) = tv%internal_heat(i,j) - resid(i,j) - G%H_to_kg_m2 * & - (G%mask2dT(i,j) * (CS%geo_heat(i,j) * (dt*Irho_cp))) - enddo ; enddo +! do i=is,ie ; do j=js,je +! resid(i,j) = tv%internal_heat(i,j) - resid(i,j) - G%H_to_kg_m2 * & +! (G%mask2dT(i,j) * (CS%geo_heat(i,j) * (dt*Irho_cp))) +! enddo ; enddo end subroutine geothermal From d93cacdeb59e55b78ec2bb0807f474cefd6756e6 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Wed, 9 Apr 2014 10:44:35 -0400 Subject: [PATCH 05/14] change loop structure and implement openmp --- src/tracer/MOM_tracer_hor_diff.F90 | 120 ++++++++++++++++++----------- 1 file changed, 76 insertions(+), 44 deletions(-) diff --git a/src/tracer/MOM_tracer_hor_diff.F90 b/src/tracer/MOM_tracer_hor_diff.F90 index bd08f694dc..d4ea69ea4a 100644 --- a/src/tracer/MOM_tracer_hor_diff.F90 +++ b/src/tracer/MOM_tracer_hor_diff.F90 @@ -445,6 +445,7 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & real, dimension(SZI_(G), SZJ_(G), SZK_(G)) :: & tr_flux_conv ! The flux convergence of tracers, in TR m3 or TR kg. + real, dimension(SZI_(G), SZJ_(G), SZK_(G)) :: Tr_flux_3d, Tr_adj_vert_L, Tr_adj_vert_R real, dimension(SZI_(G), SZK_(G), SZJ_(G)) :: & rho_srt, & ! The density of each layer of the sorted columns, in kg m-3. @@ -526,12 +527,14 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & enddo call pass_var(Tr(ntr)%t(:,:,:), G%Domain) call cpu_clock_end(id_clock_pass) - ! Determine which layers the mixed- and buffer-layers map into... - do k=1,nkmb ; do j=js-2,je+2 - call calculate_density(tv%T(:,j,k),tv%S(:,j,k), p_ref_cv, & +!$OMP parallel do default(shared) + do k=1,nkmb + do j=js-2,je+2 + call calculate_density(tv%T(:,j,k),tv%S(:,j,k), p_ref_cv, & rho_coord(:,j,k), is-2, ie-is+5, tv%eqn_of_state) - enddo ; enddo + enddo + enddo do j=js-2,je+2 ; do i=is-2,ie+2 Rml_max(i,j) = rho_coord(i,j,1) @@ -540,10 +543,10 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & do k=2,nkmb ; do j=js-2,je+2 ; do i=is-2,ie+2 if (Rml_max(i,j) < rho_coord(i,j,k)) Rml_max(i,j) = rho_coord(i,j,k) enddo ; enddo ; enddo - ! Use bracketing and bisection to find the k-level that the densest of the ! mixed and buffer layer corresponds to, such that: ! G%Rlay(max_kRho-1) < Rml_max <= G%Rlay(max_kRho) +!$OMP parallel do default(shared) private(k_min,k_max,k_test) do j=js-2,je+2 ; do i=is-2,ie+2 ; if (G%mask2dT(i,j) > 0.5) then if (Rml_max(i,j) > G%Rlay(nz)) then ; max_kRho(i,j) = nz+1 elseif (Rml_max(i,j) <= G%Rlay(nkmb+1)) then ; max_kRho(i,j) = nkmb+1 @@ -569,27 +572,30 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & if (PEmax_kRho > nz) PEmax_kRho = nz ! PEmax_kRho could have been nz+1. h_exclude = 10.0*(G%Angstrom + G%H_subroundoff) - - do k=1,nkmb ; do j=js-1,je+1 ; do i=is-1,ie+1 ; if (G%mask2dT(i,j) > 0.5) then - if (h(i,j,k) > h_exclude) then - num_srt(i,j) = num_srt(i,j) + 1 ; ns = num_srt(i,j) - k0_srt(i,ns,j) = k - rho_srt(i,ns,j) = rho_coord(i,j,k) - h_srt(i,ns,j) = h(i,j,k) - endif - endif ; enddo ; enddo ; enddo - do k=nkmb+1,PEmax_kRho ; do j=js-1,je+1 ; do i=is-1,ie+1 ; if (G%mask2dT(i,j) > 0.5) then - if ((k<=k_end_srt(i,j)) .and. (h(i,j,k) > h_exclude)) then - num_srt(i,j) = num_srt(i,j) + 1 ; ns = num_srt(i,j) - k0_srt(i,ns,j) = k - rho_srt(i,ns,j) = G%Rlay(k) - h_srt(i,ns,j) = h(i,j,k) - endif - endif ; enddo ; enddo ; enddo - +!$OMP parallel default(shared) private(ns,tmp,itmp) +!$OMP do + do j=js-1,je+1 + do k=1,nkmb ; do i=is-1,ie+1 ; if (G%mask2dT(i,j) > 0.5) then + if (h(i,j,k) > h_exclude) then + num_srt(i,j) = num_srt(i,j) + 1 ; ns = num_srt(i,j) + k0_srt(i,ns,j) = k + rho_srt(i,ns,j) = rho_coord(i,j,k) + h_srt(i,ns,j) = h(i,j,k) + endif + endif ; enddo ; enddo + do k=nkmb+1,PEmax_kRho ; do i=is-1,ie+1 ; if (G%mask2dT(i,j) > 0.5) then + if ((k<=k_end_srt(i,j)) .and. (h(i,j,k) > h_exclude)) then + num_srt(i,j) = num_srt(i,j) + 1 ; ns = num_srt(i,j) + k0_srt(i,ns,j) = k + rho_srt(i,ns,j) = G%Rlay(k) + h_srt(i,ns,j) = h(i,j,k) + endif + endif ; enddo ; enddo + enddo ! Sort each column by increasing density. This should already be close, ! and the size of the arrays are small, so straight insertion is used. - do j=js-1,je+1 ; do i=is-1,ie+1 +!$OMP do + do j=js-1,je+1; do i=is-1,ie+1 do k=2,num_srt(i,j) ; if (rho_srt(i,k,j) < rho_srt(i,k-1,j)) then ! The last segment needs to be shuffled earlier in the list. do k2 = k,2,-1 ; if (rho_srt(i,k2,j) >= rho_srt(i,k2-1,j)) exit @@ -598,12 +604,13 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & tmp = h_srt(i,k2-1,j) ; h_srt(i,k2-1,j) = h_srt(i,k2,j) ; h_srt(i,k2,j) = tmp enddo endif ; enddo - enddo ; enddo - + enddo; enddo +!$OMP do do j=js-1,je+1 max_srt(j) = 0 do i=is-1,ie+1 ; max_srt(j) = max(max_srt(j), num_srt(i,j)) ; enddo enddo +!$OMP end parallel do j=js,je k_size = max(2*max_srt(j),1) @@ -617,6 +624,9 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & allocate(k0b_Ru(j)%p(IsdB:IedB,k_size)) enddo +!$OMP parallel do default(private) shared(is,ie,js,je,G,num_srt,rho_srt,k0b_Lu,k0_srt, & +!$OMP k0b_Ru,k0a_Lu,k0a_Ru,deep_wt_Lu,deep_wt_Ru, & +!$OMP h_srt,nkmb,nPu,hP_Lu,hP_Ru) do j=js,je ; do I=is-1,ie ; if (G%mask2dCu(I,j) > 0.5) then ! Set up the pairings for fluxes through the zonal faces. @@ -763,6 +773,9 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & allocate(k0b_Rv(J)%p(isd:ied,k_size)) enddo +!$OMP parallel do default(private) shared(is,ie,js,je,G,num_srt,rho_srt,k0b_Lv,k0b_Rv, & +!$OMP k0_srt,k0a_Lv,k0a_Rv,deep_wt_Lv,deep_wt_Rv, & +!$OMP h_srt,nkmb,nPv,hP_Lv,hP_Rv) do J=js-1,je ; do i=is,ie ; if (G%mask2dCv(i,J) > 0.5) then ! Set up the pairings for fluxes through the meridional faces. @@ -917,7 +930,9 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & endif do m=1,ntr - +!$OMP parallel do default(shared) private(Tr_min_face,Tr_max_face,kLa,kLb,kRa,kRb,Tr_La, & +!$OMP Tr_Lb,Tr_Ra,Tr_Rb,Tr_av_L,wt_b,Tr_av_R,h_L,h_R, & +!$OMP Tr_flux,Tr_adj_vert,wt_a,vol) do j=js,je ; do I=is-1,ie ; if (G%mask2dCu(I,j) > 0.5) then ! Determine the fluxes through the zonal faces. @@ -1051,6 +1066,9 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & enddo ! Loop over pairings at faces. endif ; enddo ; enddo ! i- & j- loops over zonal faces. +!$OMP parallel do default(shared) private(Tr_min_face,Tr_max_face,kLa,kLb,kRa,kRb, & +!$OMP Tr_La,Tr_Lb,Tr_Ra,Tr_Rb,Tr_av_L,wt_b,Tr_av_R, & +!$OMP h_L,h_R,Tr_flux,Tr_adj_vert,wt_a,vol) do J=js-1,je ; do i=is,ie ; if (G%mask2dCv(i,J) > 0.5) then ! Determine the fluxes through the meridional faces. @@ -1105,10 +1123,9 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & h_L = hP_Lv(J)%p(i,k) ; h_R = hP_Rv(J)%p(i,k) Tr_flux = I_maxitt * ((2.0 * h_L * h_R) / (h_L + h_R)) * & khdt_epi_y(i,J) * (Tr_av_L - Tr_av_R) - - if (deep_wt_Lv(J)%p(i,k) >= 1.0) then - tr_flux_conv(i,j,kLb) = tr_flux_conv(i,j,kLb) - Tr_flux - else + Tr_flux_3d(i,j,k) = Tr_flux + + if (deep_wt_Lv(J)%p(i,k) < 1.0) then Tr_adj_vert = 0.0 wt_b = deep_wt_Lv(J)%p(i,k) ; wt_a = 1.0 - wt_b vol = hP_Lv(J)%p(i,k) * G%areaT(i,j) @@ -1132,14 +1149,10 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & (vol*wt_a)*(Tr_Lb - Tr_La)) endif endif - - tr_flux_conv(i,j,kLa) = tr_flux_conv(i,j,kLa) - (wt_a*Tr_flux + Tr_adj_vert) - tr_flux_conv(i,j,kLb) = tr_flux_conv(i,j,kLb) - (wt_b*Tr_flux - Tr_adj_vert) + Tr_adj_vert_L(i,j,k) = Tr_adj_vert endif - if (deep_wt_Rv(J)%p(i,k) >= 1.0) then - tr_flux_conv(i,j+1,kRb) = tr_flux_conv(i,j+1,kRb) + Tr_flux - else + if (deep_wt_Rv(J)%p(i,k) < 1.0) then Tr_adj_vert = 0.0 wt_b = deep_wt_Rv(J)%p(i,k) ; wt_a = 1.0 - wt_b vol = hP_Rv(J)%p(i,k) * G%areaT(i,j+1) @@ -1163,18 +1176,37 @@ subroutine tracer_epipycnal_ML_diff(h, dt, Tr, ntr, khdt_epi_x, khdt_epi_y, G, & (vol*wt_a)*(Tr_Rb - Tr_Ra)) endif endif - - tr_flux_conv(i,j+1,kRa) = tr_flux_conv(i,j+1,kRa) + & - (wt_a*Tr_flux - Tr_adj_vert) - tr_flux_conv(i,j+1,kRb) = tr_flux_conv(i,j+1,kRb) + & - (wt_b*Tr_flux + Tr_adj_vert) + Tr_adj_vert_R(i,j,k) = Tr_adj_vert endif if (associated(Tr(m)%df2d_y)) & Tr(m)%df2d_y(i,J) = Tr(m)%df2d_y(i,J) + Tr_flux * Idt enddo ! Loop over pairings at faces. endif ; enddo ; enddo ! i- & j- loops over meridional faces. - - +!$OMP parallel do default(shared) private(kLa,kLb,kRa,kRb,wt_b,wt_a) + do i=is,ie ; do J=js-1,je ; if (G%mask2dCv(i,J) > 0.5) then + do k=1,nPv(i,J) + kLb = k0b_Lv(J)%p(i,k); kRb = k0b_Rv(J)%p(i,k) + if (deep_wt_Lv(J)%p(i,k) >= 1.0) then + tr_flux_conv(i,j,kLb) = tr_flux_conv(i,j,kLb) - Tr_flux_3d(i,j,k) + else + kLa = k0a_Lv(J)%p(i,k) + wt_b = deep_wt_Lv(J)%p(i,k) ; wt_a = 1.0 - wt_b + tr_flux_conv(i,j,kLa) = tr_flux_conv(i,j,kLa) - (wt_a*Tr_flux_3d(i,j,k) + Tr_adj_vert_L(i,j,k)) + tr_flux_conv(i,j,kLb) = tr_flux_conv(i,j,kLb) - (wt_b*Tr_flux_3d(i,j,k) - Tr_adj_vert_L(i,j,k)) + endif + if (deep_wt_Rv(J)%p(i,k) >= 1.0) then + tr_flux_conv(i,j+1,kRb) = tr_flux_conv(i,j+1,kRb) + tr_flux_3d(i,j,k) + else + kRa = k0a_Rv(J)%p(i,k) + wt_b = deep_wt_Rv(J)%p(i,k) ; wt_a = 1.0 - wt_b + tr_flux_conv(i,j+1,kRa) = tr_flux_conv(i,j+1,kRa) + & + (wt_a*Tr_flux_3d(i,j,k) - Tr_adj_vert_R(i,j,k)) + tr_flux_conv(i,j+1,kRb) = tr_flux_conv(i,j+1,kRb) + & + (wt_b*Tr_flux_3d(i,j,k) + Tr_adj_vert_R(i,j,k)) + endif + enddo + endif ; enddo ; enddo +!$OMP parallel do default(shared) do k=1,PEmax_kRho ; do j=js,je ; do i=is,ie if ((G%mask2dT(i,j) > 0.5) .and. (h(i,j,k) > 0.0)) then Tr(m)%t(i,j,k) = Tr(m)%t(i,j,k) + tr_flux_conv(i,j,k) / & From 287ac5335a4fc628014fd9029e2facbbae05a0b2 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Wed, 9 Apr 2014 11:00:55 -0400 Subject: [PATCH 06/14] rearrange loop structure and implement openmp --- .../lateral/MOM_thickness_diffuse.F90 | 180 ++++++++++-------- 1 file changed, 100 insertions(+), 80 deletions(-) diff --git a/src/parameterizations/lateral/MOM_thickness_diffuse.F90 b/src/parameterizations/lateral/MOM_thickness_diffuse.F90 index daa11a939b..513fe441d4 100644 --- a/src/parameterizations/lateral/MOM_thickness_diffuse.F90 +++ b/src/parameterizations/lateral/MOM_thickness_diffuse.F90 @@ -408,10 +408,10 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, dt, G, MEKE, & real, dimension(SZI_(G), SZJ_(G), SZK_(G)+1) :: & pres, & ! The pressure at an interface, in Pa. h_avail_rsum ! The running sum of h_avail above an interface, in m3 s-1. - real, dimension(SZIB_(G), SZJ_(G)) :: & + real, dimension(SZIB_(G)) :: & drho_dT_u, & ! The derivatives of density with temperature and drho_dS_u ! salinity at u points, in kg m-3 K-1 and kg m-3 psu-1. - real, dimension(SZI_(G), SZJB_(G)) :: & + real, dimension(SZI_(G)) :: & drho_dT_v, & ! The derivatives of density with temperature and drho_dS_v ! salinity at v points, in kg m-3 K-1 and kg m-3 psu-1. real :: uhtot(SZIB_(G), SZJ_(G)) ! The vertical sum of uhD, in m3 s-1. @@ -464,7 +464,7 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, dt, G, MEKE, & ! Diagnostics that should be eliminated altogether later... ! real, dimension(SZIB_(G), SZJ_(G), SZK_(G)+1) :: sfn_x, sfn_slope_x ! real, dimension(SZI_(G), SZJB_(G), SZK_(G)+1) :: sfn_y, sfn_slope_y - logical :: MEKE_not_null + logical :: MEKE_not_null, present_int_slope_u, present_int_slope_v integer :: is, ie, js, je, nz, IsdB integer :: i, j, k is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec ; nz = G%ke ; IsdB = G%IsdB @@ -477,6 +477,8 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, dt, G, MEKE, & use_EOS = associated(tv%eqn_of_state) MEKE_not_null = (LOC(MEKE) .NE. 0) + present_int_slope_u = PRESENT(int_slope_u) + present_int_slope_u = PRESENT(int_slope_v) nk_linear = max(G%nkml, 1) @@ -524,40 +526,42 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, dt, G, MEKE, & enddo ; enddo !$OMP end parallel -!!$OMP parallel do default(none) - do K=nz,2,-1 +!$OMP parallel do default(private) shared(nz,is,ie,js,je,find_work,use_EOS,G,pres,T,S, & +!$OMP nk_linear,IsdB,tv,h,h_neglect,e,dz_neglect, & +!$OMP I_slope_max2,h_neglect2,present_int_slope_u, & +!$OMP int_slope_u,KH_u,uhtot,h_frac,h_avail_rsum, & +!$OMP uhD,h_avail,G_scale,work_u) + do j = js,je ; do K=nz,2,-1 if (find_work .and. .not.(use_EOS)) then - drdiA = 0.0 ; drdiB = 0.0 ; drdjA = 0.0 ; drdjB = 0.0 + drdiA = 0.0 ; drdiB = 0.0 ! drdkL = G%g_prime(k) ; drdkR = G%g_prime(k) drdkL = G%Rlay(k)-G%Rlay(k-1) ; drdkR = G%Rlay(k)-G%Rlay(k-1) endif ! Calculate the zonal fluxes and gradients. if (use_EOS .and. ((k > nk_linear) .or. find_work)) then - do j=js,je - do I=is-1,ie - pres_u(I) = 0.5*(pres(i,j,K) + pres(i+1,j,K)) - T_u(I) = 0.25*((T(i,j,k) + T(i+1,j,k)) + (T(i,j,k-1) + T(i+1,j,k-1))) - S_u(I) = 0.25*((S(i,j,k) + S(i+1,j,k)) + (S(i,j,k-1) + S(i+1,j,k-1))) - enddo - call calculate_density_derivs(T_u, S_u, pres_u, drho_dT_u(:,j), & - drho_dS_u(:,j), (is-IsdB+1)-1, ie-is+2, tv%eqn_of_state) + do I=is-1,ie + pres_u(I) = 0.5*(pres(i,j,K) + pres(i+1,j,K)) + T_u(I) = 0.25*((T(i,j,k) + T(i+1,j,k)) + (T(i,j,k-1) + T(i+1,j,k-1))) + S_u(I) = 0.25*((S(i,j,k) + S(i+1,j,k)) + (S(i,j,k-1) + S(i+1,j,k-1))) enddo + call calculate_density_derivs(T_u, S_u, pres_u, drho_dT_u, & + drho_dS_u, (is-IsdB+1)-1, ie-is+2, tv%eqn_of_state) endif - do j=js,je ; do I=is-1,ie + do I=is-1,ie if (use_EOS .and. ((k > nk_linear) .or. find_work)) then ! Estimate the horizontal density gradients along layers. - drdiA = drho_dT_u(I,j) * (T(i+1,j,k-1)-T(i,j,k-1)) + & - drho_dS_u(I,j) * (S(i+1,j,k-1)-S(i,j,k-1)) - drdiB = drho_dT_u(I,j) * (T(i+1,j,k)-T(i,j,k)) + & - drho_dS_u(I,j) * (S(i+1,j,k)-S(i,j,k)) + drdiA = drho_dT_u(I) * (T(i+1,j,k-1)-T(i,j,k-1)) + & + drho_dS_u(I) * (S(i+1,j,k-1)-S(i,j,k-1)) + drdiB = drho_dT_u(I) * (T(i+1,j,k)-T(i,j,k)) + & + drho_dS_u(I) * (S(i+1,j,k)-S(i,j,k)) ! Estimate the vertical density gradients times the grid spacing. - drdkL = (drho_dT_u(I,j) * (T(i,j,k)-T(i,j,k-1)) + & - drho_dS_u(I,j) * (S(i,j,k)-S(i,j,k-1))) - drdkR = (drho_dT_u(I,j) * (T(i+1,j,k)-T(i+1,j,k-1)) + & - drho_dS_u(I,j) * (S(i+1,j,k)-S(i+1,j,k-1))) + drdkL = (drho_dT_u(I) * (T(i,j,k)-T(i,j,k-1)) + & + drho_dS_u(I) * (S(i,j,k)-S(i,j,k-1))) + drdkR = (drho_dT_u(I) * (T(i+1,j,k)-T(i+1,j,k-1)) + & + drho_dS_u(I) * (S(i+1,j,k)-S(i+1,j,k-1))) endif @@ -601,7 +605,7 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, dt, G, MEKE, & slope2_Ratio = 1.0e20 ! Force the use of the safe streamfunction. endif - if (present(int_slope_u)) then + if (present_int_slope_u) then Slope = (1.0 - int_slope_u(I,j,K)) * Slope + & int_slope_u(I,j,K) * ((e(i+1,j,K)-e(i,j,K)) * G%IdxCu(I,j)) slope2_Ratio = (1.0 - int_slope_u(I,j,K)) * slope2_Ratio @@ -690,34 +694,45 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, dt, G, MEKE, & (uhD(I,j,K) * drdiB) * 0.25 * & ((e(i,j,K) + e(i,j,K+1)) + (e(i+1,j,K) + e(i+1,j,K+1))) ) endif - - enddo ; enddo + + enddo + enddo ; enddo ! end of j-loop ! Calculate the meridional fluxes and gradients. +!$OMP parallel do default(private) shared(nz,is,ie,js,je,find_work,use_EOS,G,pres,T,S, & +!$OMP nk_linear,IsdB,tv,h,h_neglect,e,dz_neglect, & +!$OMP I_slope_max2,h_neglect2,present_int_slope_v, & +!$OMP int_slope_v,KH_v,vhtot,h_frac,h_avail_rsum, & +!$OMP vhD,h_avail,G_scale,Work_v) + do j = js-1,je ; do K=nz,2,-1 + if (find_work .and. .not.(use_EOS)) then + drdjA = 0.0 ; drdjB = 0.0 +! drdkL = G%g_prime(k) ; drdkR = G%g_prime(k) + drdkL = G%Rlay(k)-G%Rlay(k-1) ; drdkR = G%Rlay(k)-G%Rlay(k-1) + endif + if (use_EOS .and. ((k > nk_linear) .or. find_work)) then - do J=js-1,je - do i=is,ie - pres_v(i) = 0.5*(pres(i,j,K) + pres(i,j+1,K)) - T_v(i) = 0.25*((T(i,j,k) + T(i,j+1,k)) + (T(i,j,k-1) + T(i,j+1,k-1))) - S_v(i) = 0.25*((S(i,j,k) + S(i,j+1,k)) + (S(i,j,k-1) + S(i,j+1,k-1))) - enddo - call calculate_density_derivs(T_v, S_v, pres_v, drho_dT_v(:,J), & - drho_dS_v(:,J), is, ie-is+1, tv%eqn_of_state) + do i=is,ie + pres_v(i) = 0.5*(pres(i,j,K) + pres(i,j+1,K)) + T_v(i) = 0.25*((T(i,j,k) + T(i,j+1,k)) + (T(i,j,k-1) + T(i,j+1,k-1))) + S_v(i) = 0.25*((S(i,j,k) + S(i,j+1,k)) + (S(i,j,k-1) + S(i,j+1,k-1))) enddo + call calculate_density_derivs(T_v, S_v, pres_v, drho_dT_v, & + drho_dS_v, is, ie-is+1, tv%eqn_of_state) endif - do J=js-1,je ; do i=is,ie + do i=is,ie if (use_EOS .and. ((k > nk_linear) .or. find_work)) then ! Estimate the horizontal density gradients along layers. - drdjA = drho_dT_v(i,J) * (T(i,j+1,k-1)-T(i,j,k-1)) + & - drho_dS_v(i,J) * (S(i,j+1,k-1)-S(i,j,k-1)) - drdjB = drho_dT_v(i,J) * (T(i,j+1,k)-T(i,j,k)) + & - drho_dS_v(i,J) * (S(i,j+1,k)-S(i,j,k)) + drdjA = drho_dT_v(i) * (T(i,j+1,k-1)-T(i,j,k-1)) + & + drho_dS_v(i) * (S(i,j+1,k-1)-S(i,j,k-1)) + drdjB = drho_dT_v(i) * (T(i,j+1,k)-T(i,j,k)) + & + drho_dS_v(i) * (S(i,j+1,k)-S(i,j,k)) ! Estimate the vertical density gradients times the grid spacing. - drdkL = (drho_dT_v(i,J) * (T(i,j,k)-T(i,j,k-1)) + & - drho_dS_v(i,J) * (S(i,j,k)-S(i,j,k-1))) - drdkR = (drho_dT_v(i,J) * (T(i,j+1,k)-T(i,j+1,k-1)) + & - drho_dS_v(i,J) * (S(i,j+1,k)-S(i,j+1,k-1))) + drdkL = (drho_dT_v(i) * (T(i,j,k)-T(i,j,k-1)) + & + drho_dS_v(i) * (S(i,j,k)-S(i,j,k-1))) + drdkR = (drho_dT_v(i) * (T(i,j+1,k)-T(i,j+1,k-1)) + & + drho_dS_v(i) * (S(i,j+1,k)-S(i,j+1,k-1))) endif if (k > nk_linear) then @@ -760,7 +775,7 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, dt, G, MEKE, & slope2_Ratio = 1.0e20 ! Force the use of the safe streamfunction. endif - if (present(int_slope_v)) then + if (present_int_slope_v) then Slope = (1.0 - int_slope_v(i,J,K)) * Slope + & int_slope_v(i,J,K) * ((e(i,j+1,K)-e(i,j,K)) * G%IdyCv(i,J)) slope2_Ratio = (1.0 - int_slope_v(i,J,K)) * slope2_Ratio @@ -847,54 +862,59 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, dt, G, MEKE, & (vhD(i,J,K) * drdjB) * 0.25 * & ((e(i,j,K) + e(i,j,K+1)) + (e(i,j+1,K) + e(i+1,j,K+1))) ) endif - enddo ; enddo - enddo ! k-loop + enddo + enddo ; enddo! j-loop ! In layer 1, enforce the boundary conditions that Sfn(z=0) = 0.0 if (.not.find_work .or. .not.(use_EOS)) then do j=js,je ; do I=is-1,ie ; uhD(I,j,1) = -uhtot(I,j) ; enddo ; enddo do J=js-1,je ; do i=is,ie ; vhD(i,J,1) = -vhtot(i,J) ; enddo ; enddo else - if (use_EOS) then ; do j=js,je - do I=is-1,ie - pres_u(I) = 0.5*(pres(i,j,1) + pres(i+1,j,1)) - T_u(I) = 0.5*(T(i,j,1) + T(i+1,j,1)) - S_u(I) = 0.5*(S(i,j,1) + S(i+1,j,1)) - enddo - call calculate_density_derivs(T_u, S_u, pres_u, drho_dT_u(:,j), & - drho_dS_u(:,j), (is-IsdB+1)-1, ie-is+2, tv%eqn_of_state) - enddo ; endif - do j=js,je ; do I=is-1,ie - uhD(I,j,1) = -uhtot(I,j) - - if (use_EOS) then - drdiB = drho_dT_u(I,j) * (T(i+1,j,1)-T(i,j,1)) + & - drho_dS_u(I,j) * (S(i+1,j,1)-S(i,j,1)) +!$OMP parallel do default(shared) private(pres_u,T_u,S_u,drho_dT_u,drho_dS_u,drdiB) + do j=js,je + if (use_EOS) then + do I=is-1,ie + pres_u(I) = 0.5*(pres(i,j,1) + pres(i+1,j,1)) + T_u(I) = 0.5*(T(i,j,1) + T(i+1,j,1)) + S_u(I) = 0.5*(S(i,j,1) + S(i+1,j,1)) + enddo + call calculate_density_derivs(T_u, S_u, pres_u, drho_dT_u, & + drho_dS_u, (is-IsdB+1)-1, ie-is+2, tv%eqn_of_state) endif - Work_u(I,j) = Work_u(I,j) + G_scale * ( (uhD(I,j,1) * drdiB) * 0.25 * & - ((e(i,j,1) + e(i,j,2)) + (e(i+1,j,1) + e(i+1,j,2))) ) + do I=is-1,ie + uhD(I,j,1) = -uhtot(I,j) - enddo ; enddo + if (use_EOS) then + drdiB = drho_dT_u(I) * (T(i+1,j,1)-T(i,j,1)) + & + drho_dS_u(I) * (S(i+1,j,1)-S(i,j,1)) + endif + Work_u(I,j) = Work_u(I,j) + G_scale * ( (uhD(I,j,1) * drdiB) * 0.25 * & + ((e(i,j,1) + e(i,j,2)) + (e(i+1,j,1) + e(i+1,j,2))) ) - if (use_EOS) then ; do J=js-1,je - do i=is,ie - pres_v(i) = 0.5*(pres(i,j,1) + pres(i,j+1,1)) - T_v(i) = 0.5*(T(i,j,1) + T(i,j+1,1)) - S_v(i) = 0.5*(S(i,j,1) + S(i,j+1,1)) - enddo - call calculate_density_derivs(T_v, S_v, pres_v, drho_dT_v(:,J), & - drho_dS_v(:,J), is, ie-is+1, tv%eqn_of_state) - enddo ; endif - do J=js-1,je ; do i=is,ie - vhD(i,J,1) = -vhtot(i,J) + enddo + enddo + do J=js-1,je if (use_EOS) then - drdjB = drho_dT_v(i,J) * (T(i,j+1,1)-T(i,j,1)) + & - drho_dS_v(i,J) * (S(i,j+1,1)-S(i,j,1)) + do i=is,ie + pres_v(i) = 0.5*(pres(i,j,1) + pres(i,j+1,1)) + T_v(i) = 0.5*(T(i,j,1) + T(i,j+1,1)) + S_v(i) = 0.5*(S(i,j,1) + S(i,j+1,1)) + enddo + call calculate_density_derivs(T_v, S_v, pres_v, drho_dT_v, & + drho_dS_v, is, ie-is+1, tv%eqn_of_state) endif - Work_v(i,J) = Work_v(i,J) - G_scale * ( (vhD(i,J,1) * drdjB) * 0.25 * & - ((e(i,j,1) + e(i,j,2)) + (e(i,j+1,1) + e(i,j+1,2))) ) - enddo ; enddo + do i=is,ie + vhD(i,J,1) = -vhtot(i,J) + + if (use_EOS) then + drdjB = drho_dT_v(i) * (T(i,j+1,1)-T(i,j,1)) + & + drho_dS_v(i) * (S(i,j+1,1)-S(i,j,1)) + endif + Work_v(i,J) = Work_v(i,J) - G_scale * ( (vhD(i,J,1) * drdjB) * 0.25 * & + ((e(i,j,1) + e(i,j,2)) + (e(i,j+1,1) + e(i,j+1,2))) ) + enddo + enddo endif if (find_work) then ; do j=js,je ; do i=is,ie From 93dd435d65b2df8e44d4d6e82cbab202388a0883 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 14 Apr 2014 09:10:13 -0400 Subject: [PATCH 07/14] Add more OMP directives --- .../vertical/MOM_set_diffusivity.F90 | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/parameterizations/vertical/MOM_set_diffusivity.F90 b/src/parameterizations/vertical/MOM_set_diffusivity.F90 index b2d332ec06..26c2db59d4 100644 --- a/src/parameterizations/vertical/MOM_set_diffusivity.F90 +++ b/src/parameterizations/vertical/MOM_set_diffusivity.F90 @@ -522,19 +522,27 @@ subroutine set_diffusivity(u, v, h, u_h, v_h, tv, fluxes, optics, visc, dt, G, C ! the appropriate place to add a depth-dependent parameterization or ! another explicit parameterization of Kd. - if (CS%Bryan_Lewis_diffusivity) then ; do j=js,je ; do i=is,ie - Kd_sfc(i,j) = CS%Kd_Bryan_Lewis_surface - enddo ; enddo ; else ; do j=js,je ; do i=is,ie - Kd_sfc(i,j) = CS%Kd - enddo ; enddo ; endif + if (CS%Bryan_Lewis_diffusivity) then +!$OMP parallel do default(shared) + do j=js,je ; do i=is,ie + Kd_sfc(i,j) = CS%Kd_Bryan_Lewis_surface + enddo ; enddo + else +!$OMP parallel do default(shared) + do j=js,je ; do i=is,ie + Kd_sfc(i,j) = CS%Kd + enddo ; enddo + endif if (CS%Henyey_IGW_background) then I_x30 = 2.0 / invcosh(CS%N0_2Omega*2.0) ! This is evaluated at 30 deg. +!$OMP parallel do default(shared) private(abs_sin) do j=js,je ; do i=is,ie abs_sin = abs(sin(G%geoLatT(i,j)*deg_to_rad)) Kd_sfc(i,j) = max(CS%Kd_min, Kd_sfc(i,j) * & ((abs_sin * invcosh(CS%N0_2Omega/max(epsilon,abs_sin))) * I_x30) ) enddo ; enddo elseif (CS%Kd_tanh_lat_fn) then +!$OMP parallel do default(shared) do j=js,je ; do i=is,ie ! The transition latitude and latitude range are hard-scaled here, since ! this is not really intended for wide-spread use, but rather for @@ -728,12 +736,18 @@ subroutine set_diffusivity(u, v, h, u_h, v_h, tv, fluxes, optics, visc, dt, G, C endif if (CS%Kd_add > 0.0) then - if (present(Kd_int)) then ; do k=1,nz ; do j=js,je ; do i=is,ie - Kd_int(i,j,K) = Kd_int(i,j,K) + CS%Kd_add - Kd(i,j,k) = Kd(i,j,k) + CS%Kd_add - enddo ; enddo ; enddo ; else ; do k=1,nz ; do j=js,je ; do i=is,ie - Kd(i,j,k) = Kd(i,j,k) + CS%Kd_add - enddo ; enddo ; enddo ; endif + if (present(Kd_int)) then +!$OMP parallel do default(shared) + do k=1,nz ; do j=js,je ; do i=is,ie + Kd_int(i,j,K) = Kd_int(i,j,K) + CS%Kd_add + Kd(i,j,k) = Kd(i,j,k) + CS%Kd_add + enddo ; enddo ; enddo + else +!$OMP parallel do default(shared) + do k=1,nz ; do j=js,je ; do i=is,ie + Kd(i,j,k) = Kd(i,j,k) + CS%Kd_add + enddo ; enddo ; enddo + endif endif if (CS%user_change_diff) then From 3f6a0c38bd7b0f5ff4d94e100700c50fd4fb81a0 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 14 Apr 2014 09:52:55 -0400 Subject: [PATCH 08/14] implement openmp --- .../vertical/MOM_diabatic_driver.F90 | 107 +++++++++++++----- 1 file changed, 79 insertions(+), 28 deletions(-) diff --git a/src/parameterizations/vertical/MOM_diabatic_driver.F90 b/src/parameterizations/vertical/MOM_diabatic_driver.F90 index 9d5c615198..b53b3c0c33 100644 --- a/src/parameterizations/vertical/MOM_diabatic_driver.F90 +++ b/src/parameterizations/vertical/MOM_diabatic_driver.F90 @@ -351,6 +351,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) if (CS%debugConservation) call MOM_state_stats('1st make_frazil', u, v, h, T, S, G) if ((CS%ML_mix_first > 0.0) .or. CS%use_geothermal) then +!$OMP parallel do default(shared) do k=1,nz ; do j=js,je ; do i=is,ie h_orig(i,j,k) = h(i,j,k) ; eaml(i,j,k) = 0.0 ; ebml(i,j,k) = 0.0 enddo ; enddo ; enddo @@ -368,7 +369,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) ! biological properties and layer thicknesses. if (associated(CS%optics)) & call set_opacity(CS%optics, fluxes, G, CS%opacity_CSp) - + if (CS%bulkmixedlayer) then if (CS%ML_mix_first > 0.0) then ! This subroutine (1) Cools the mixed layer. @@ -460,28 +461,56 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) ! Thus, KPP is the last contribution to Kd. ! Changes: Kd_int. Sets: KPP_NLTheat, KPP_NLTscalar if (associated(visc%Kd_turb) .and. CS%matchKPPwithoutKappaShear) then - Kd_salt(is:ie,js:je,:) = Kd_int(is:ie,js:je,:) - visc%Kd_turb(is:ie,js:je,:) ! Temporarily remove part due to Kappa-shear - Kd_heat(is:ie,js:je,:) = Kd_int(is:ie,js:je,:) - visc%Kd_turb(is:ie,js:je,:) ! Temporarily remove part due to Kappa-shear +!$OMP parallel do default(shared) + do k=1,nz+1 ; do j=js,je ; do i=is,ie + Kd_salt(i,j,k) = Kd_int(i,j,k) - visc%Kd_turb(i,j,k) ! Temporarily remove part due to Kappa-shear + Kd_heat(i,j,k) = Kd_int(i,j,k) - visc%Kd_turb(i,j,k) ! Temporarily remove part due to Kappa-shear + enddo ; enddo ; enddo else - Kd_salt(is:ie,js:je,:) = Kd_int(is:ie,js:je,:) - Kd_heat(is:ie,js:je,:) = Kd_int(is:ie,js:je,:) +!$OMP parallel do default(shared) + do k=1,nz+1 ; do j=js,je ; do i=is,ie + Kd_salt(i,j,k) = Kd_int(i,j,k) + Kd_heat(i,j,k) = Kd_int(i,j,k) + enddo ; enddo ; enddo + endif + if (associated(visc%Kd_extra_S)) then +!$OMP parallel do default(shared) + do k=1,nz+1 ; do j=js,je ; do i=is,ie + Kd_salt(i,j,k) = Kd_salt(i,j,k) + visc%Kd_extra_S(i,j,k) + enddo ; enddo ; enddo + endif + if (associated(visc%Kd_extra_T)) then +!$OMP parallel do default(shared) + do k=1,nz+1 ; do j=js,je ; do i=is,ie + Kd_heat(i,j,k) = Kd_heat(i,j,k) + visc%Kd_extra_T(i,j,k) + enddo ; enddo ; enddo endif - if (associated(visc%Kd_extra_S)) & - Kd_salt(is:ie,js:je,:) = Kd_salt(is:ie,js:je,:) + visc%Kd_extra_S(is:ie,js:je,:) - if (associated(visc%Kd_extra_T)) & - Kd_heat(is:ie,js:je,:) = Kd_heat(is:ie,js:je,:) + visc%Kd_extra_T(is:ie,js:je,:) call KPP_calculate(CS%KPP_CSp, G, h, tv%T, tv%S, u, v, tv%eqn_of_state, & fluxes%ustar, CS%buoyancyFlux, Kd_heat, Kd_salt, visc%Kv_turb, CS%KPP_NLTheat, CS%KPP_NLTscalar) if (.not. CS%KPPisPassive) then if (associated(visc%Kd_turb) .and. CS%matchKPPwithoutKappaShear) then - Kd_salt(is:ie,js:je,:) = ( Kd_salt(is:ie,js:je,:) + visc%Kd_turb(is:ie,js:je,:) ) ! Put back part due to Kappa-shear - Kd_heat(is:ie,js:je,:) = ( Kd_heat(is:ie,js:je,:) + visc%Kd_turb(is:ie,js:je,:) ) ! Put back part due to Kappa-shear +!$OMP parallel do default(shared) + do k=1,nz+1 ; do j=js,je ; do i=is,ie + Kd_salt(i,j,k) = ( Kd_salt(i,j,k) + visc%Kd_turb(i,j,k) ) ! Put back part due to Kappa-shear + Kd_heat(i,j,k) = ( Kd_heat(i,j,k) + visc%Kd_turb(i,j,k) ) ! Put back part due to Kappa-shear + enddo ; enddo ; enddo + endif +!$OMP parallel do default(shared) + do k=1,nz+1 ; do j=js,je ; do i=is,ie + Kd_int(i,j,k) = min( Kd_salt(i,j,k), Kd_heat(i,j,k) ) + enddo ; enddo ; enddo + if (associated(visc%Kd_extra_S)) then +!$OMP parallel do default(shared) + do k=1,nz+1 ; do j=js,je ; do i=is,ie + visc%Kd_extra_S(i,j,k) = Kd_salt(i,j,k) - Kd_int(i,j,k) + enddo ; enddo ; enddo + endif + if (associated(visc%Kd_extra_T)) then +!$OMP parallel do default(shared) + do k=1,nz+1 ; do j=js,je ; do i=is,ie + visc%Kd_extra_T(i,j,k) = Kd_heat(i,j,k) - Kd_int(i,j,k) + enddo ; enddo ; enddo endif - Kd_int(is:ie,js:je,:) = min( Kd_salt(is:ie,js:je,:), Kd_heat(is:ie,js:je,:) ) - if (associated(visc%Kd_extra_S)) & - visc%Kd_extra_S(is:ie,js:je,:) = Kd_salt(is:ie,js:je,:) - Kd_int(is:ie,js:je,:) - if (associated(visc%Kd_extra_T)) & - visc%Kd_extra_T(is:ie,js:je,:) = Kd_heat(is:ie,js:je,:) - Kd_int(is:ie,js:je,:) endif ! not passive call cpu_clock_end(id_clock_kpp) if (showCallTree) call callTree_waypoint("done with KPP_calculate (diabatic)") @@ -531,6 +560,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) do j=js,je ; do i=is,ie ea(i,j,1) = 0. enddo ; enddo +!$OMP parallel do default(shared) private(hval) do k=2,nz ; do j=js,je ; do i=is,ie hval=1.0/(h_neglect + 0.5*(h(i,j,k-1) + h(i,j,k))) ea(i,j,k) = (G%m_to_H**2) * dt * hval * Kd_int(i,j,k) @@ -583,6 +613,8 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) ! enough iterations are permitted in Calculate_Entrainment, and ! even if too few iterations are allowed, it is still guarded ! against. In other words the checks are probably unnecessary. + +!$OMP parallel do default(shared) do j=js,je do i=is,ie hold(i,j,1) = h(i,j,1) @@ -618,6 +650,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) ! are lighter than the mixed layer have temperatures and salinities ! that correspond to their prescribed densities. if (CS%massless_match_targets) then +!$OMP parallel do default (shared) private(h_tr,b1,d1,c1,b_denom_1) do j=js,je do i=is,ie h_tr = hold(i,j,1) + h_neglect @@ -693,6 +726,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) if ((CS%ML_mix_first > 0.0) .or. CS%use_geothermal) then ! The mixed layer code has already been called, but there is some needed ! bookkeeping. +!$OMP parallel do default(shared) do k=1,nz ; do j=js,je ; do i=is,ie hold(i,j,k) = h_orig(i,j,k) ea(i,j,k) = ea(i,j,k) + eaml(i,j,k) @@ -769,6 +803,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) Tdif_flx(i,j,1) = 0.0 ; Tdif_flx(i,j,nz+1) = 0.0 Tadv_flx(i,j,1) = 0.0 ; Tadv_flx(i,j,nz+1) = 0.0 enddo ; enddo +!$OMP parallel do default(shared) do K=2,nz ; do j=js,je ; do i=is,ie Tdif_flx(i,j,K) = (Idt * 0.5*(ea(i,j,k) + eb(i,j,k-1))) * & (T(i,j,k-1) - T(i,j,k)) @@ -782,6 +817,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) Sdif_flx(i,j,1) = 0.0 ; Sdif_flx(i,j,nz+1) = 0.0 Sadv_flx(i,j,1) = 0.0 ; Sadv_flx(i,j,nz+1) = 0.0 enddo ; enddo +!$OMP parallel do default(shared) do K=2,nz ; do j=js,je ; do i=is,ie Sdif_flx(i,j,K) = (Idt * 0.5*(ea(i,j,k) + eb(i,j,k-1))) * & (S(i,j,k-1) - S(i,j,k)) @@ -793,7 +829,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) call cpu_clock_begin(id_clock_tracers) if (CS%mix_boundary_tracers) then Tr_ea_BBL = sqrt(dt*CS%Kd_BBL_tr) - +!$OMP parallel do default(shared) private(htot,in_boundary,add_ent) do j=js,je do i=is,ie ebtr(i,j,nz) = eb(i,j,nz) @@ -843,6 +879,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) do j=js,je ; do i=is,ie ebtr(i,j,nz) = eb(i,j,nz) ; eatr(i,j,1) = ea(i,j,1) enddo ; enddo +!$OMP parallel do default(shared) private(add_ent) do k=nz,2,-1 ; do j=js,je ; do i=is,ie if (visc%Kd_extra_S(i,j,k) > 0.0) then add_ent = ((dt * visc%Kd_extra_S(i,j,k)) * G%m_to_H**2) / & @@ -866,6 +903,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) call cpu_clock_begin(id_clock_sponge) if (CS%bulkmixedlayer .and. ASSOCIATED(tv%eqn_of_state)) then do i=is,ie ; p_ref_cv(i) = tv%P_Ref ; enddo +!$OMP parallel do default(shared) do j=js,je call calculate_density(T(:,j,1), S(:,j,1), p_ref_cv, Rcv_ml(:,j), & is, ie-is+1, tv%eqn_of_state) @@ -881,21 +919,26 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) endif endif +!$OMP parallel default(shared) ! Save the diapycnal mass fluxes as a diagnostic field. if (ASSOCIATED(CDp%diapyc_vel)) then - do K=2,nz ; do j=js,je ; do i=is,ie - CDp%diapyc_vel(i,j,K) = Idt * (G%H_to_m * (ea(i,j,k) - eb(i,j,k-1))) - enddo ; enddo ; enddo - do j=js,je ; do i=is,ie - CDp%diapyc_vel(i,j,1) = 0.0 - CDp%diapyc_vel(i,j,nz+1) = 0.0 - enddo ; enddo +!$OMP do + do j=js,je + do K=2,nz ; do i=is,ie + CDp%diapyc_vel(i,j,K) = Idt * (G%H_to_m * (ea(i,j,k) - eb(i,j,k-1))) + enddo ; enddo + do i=is,ie + CDp%diapyc_vel(i,j,1) = 0.0 + CDp%diapyc_vel(i,j,nz+1) = 0.0 + enddo + enddo endif ! For momentum, it is only the net flux that homogenizes within ! the mixed layer. Vertical viscosity that is proportional to the ! mixed layer turbulence is applied elsewhere. if (CS%bulkmixedlayer) then +!$OMP do do k=2,G%nkml ; do j=js,je ; do i=is,ie if (ea(i,j,k) >= eb(i,j,k-1)) then ea(i,j,k) = ea(i,j,k) - eb(i,j,k-1) @@ -909,6 +952,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) ! This initializes the halo regions of ea, eb, and hold to default values. +!$OMP do do k=1,nz do i=is-1,ie+1 hold(i,js-1,k) = G%Angstrom ; ea(i,js-1,k) = 0.0 ; eb(i,js-1,k) = 0.0 @@ -919,7 +963,8 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) hold(ie+1,j,k) = G%Angstrom ; ea(ie+1,j,k) = 0.0 ; eb(ie+1,j,k) = 0.0 enddo enddo - +!$OMP end parallel + call cpu_clock_begin(id_clock_pass) if (G%symmetric) then call pass_var(hold,G%Domain,complete=.false.) @@ -940,6 +985,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) call MOM_state_chksum("before u/v tridiag ", u, v, h, G) endif call cpu_clock_begin(id_clock_tridiag) +!$OMP parallel do default(shared) private(hval,b1,d1,c1,eaval) do j=js,je do I=Isq,Ieq if (ASSOCIATED(ADp%du_dt_dia)) ADp%du_dt_dia(I,j,1) = u(I,j,1) @@ -971,6 +1017,7 @@ subroutine diabatic(u, v, h, tv, fluxes, visc, ADp, CDp, dt, G, CS) if (CS%debug) then call MOM_state_chksum("aft 1st loop tridiag ", u, v, h, G) endif +!$OMP parallel do default(shared) private(hval,b1,d1,c1,eaval) do J=Jsq,Jeq do i=is,ie if (ASSOCIATED(ADp%dv_dt_dia)) ADp%dv_dt_dia(i,J,1) = v(i,J,1) @@ -1112,7 +1159,8 @@ subroutine make_frazil(h, tv, G, CS) if (.not.CS%pressure_dependent_frazil) then do k=1,nz ; do i=is,ie ; pressure(i,k) = 0.0 ; enddo ; enddo endif - +!$OMP parallel do default(shared) firstprivate(pressure) & +!$OMP private(fraz_col,T_fr_set,T_freeze,hc) do j=js,je do i=is,ie ; fraz_col(:) = 0.0 ; enddo @@ -1239,6 +1287,8 @@ subroutine differential_diffuse_T_S(h, tv, visc, dt, G) T => tv%T ; S => tv%S Kd_T => visc%Kd_extra_T ; Kd_S => visc%Kd_extra_S +!$OMP parallel do default(shared) private(I_h_int,mix_T,mix_S,h_tr,b1_T,b1_S, & +!$OMP d1_T,d1_S,c1_T,c1_S,b_denom_T,b_denom_S) do j=js,je do i=is,ie I_h_int = 1.0 / (0.5 * (h(i,j,1) + h(i,j,2)) + h_neglect) @@ -1320,7 +1370,8 @@ subroutine adjust_salt(h, tv, G, CS) salt_add_col(:,:) = 0.0 - do k=nz,1,-1 ; do j=js,je ; do i=is,ie + + do j=js,je ; do k=nz,1,-1 ; do i=is,ie if ((G%mask2dT(i,j) > 0.0) .and. & ((tv%S(i,j,k) < S_min) .or. (salt_add_col(i,j) > 0.0))) then mc = G%H_to_kg_m2 * h(i,j,k) @@ -1752,7 +1803,7 @@ subroutine find_uv_at_h(u, v, h, u_h, v_h, G, ea, eb) if (present(ea) .neqv. present(eb)) call MOM_error(FATAL, & "find_uv_at_h: Either both ea and eb or neither one must be present "// & "in call to find_uv_at_h.") - +!$OMP parallel do default(shared) private(s,Idenom,a_w,a_e,a_s,a_n,b_denom_1,b1,d1,c1) do j=js,je do i=is,ie s = G%areaCu(i-1,j)+G%areaCu(i,j) From 662d47ea5766bf399f05567a207b9a5cd3fca8fa Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 14 Apr 2014 09:54:58 -0400 Subject: [PATCH 09/14] implement openmp --- .../vertical/MOM_opacity.F90 | 156 ++++++++++-------- 1 file changed, 88 insertions(+), 68 deletions(-) diff --git a/src/parameterizations/vertical/MOM_opacity.F90 b/src/parameterizations/vertical/MOM_opacity.F90 index 7b7df16acd..d7d6d871ef 100644 --- a/src/parameterizations/vertical/MOM_opacity.F90 +++ b/src/parameterizations/vertical/MOM_opacity.F90 @@ -152,15 +152,17 @@ subroutine set_opacity(optics, fluxes, G, CS) ! Make sure there is no division by 0. inv_sw_pen_scale = 1.0 / max(CS%pen_sw_scale, 0.1*G%Angstrom_z, G%H_to_m*G%H_subroundoff) - +!$OMP parallel do default(shared) do k=1,nz ; do j=js,je ; do i=is,ie ; do n=1,optics%nbands optics%opacity_band(n,i,j,k) = inv_sw_pen_scale enddo ; enddo ; enddo ; enddo if (.not.associated(fluxes%sw) .or. (CS%pen_SW_scale <= 0.0)) then +!$OMP parallel do default(shared) do j=js,je ; do i=is,ie ; do n=1,optics%nbands optics%sw_pen_band(n,i,j) = 0.0 enddo ; enddo ; enddo else +!$OMP parallel do default(shared) do j=js,je ; do i=is,ie ; do n=1,optics%nbands optics%sw_pen_band(n,i,j) = CS%pen_SW_frac * Inv_nbands * fluxes%sw(i,j) enddo ; enddo ; enddo @@ -169,6 +171,7 @@ subroutine set_opacity(optics, fluxes, G, CS) if (query_averaging_enabled(CS%diag)) then if (CS%id_sw_pen > 0) then +!$OMP parallel do default(shared) do j=js,je ; do i=is,ie Pen_SW_tot(i,j) = 0.0 do n=1,optics%nbands @@ -179,6 +182,7 @@ subroutine set_opacity(optics, fluxes, G, CS) endif if (CS%id_sw_vis_pen > 0) then if (CS%opacity_scheme == MANIZZA_05) then +!$OMP parallel do default(shared) do j=js,je ; do i=is,ie Pen_SW_tot(i,j) = 0.0 do n=1,min(optics%nbands,2) @@ -186,6 +190,7 @@ subroutine set_opacity(optics, fluxes, G, CS) enddo enddo ; enddo else +!$OMP parallel do default(shared) do j=js,je ; do i=is,ie Pen_SW_tot(i,j) = 0.0 do n=1,optics%nbands @@ -196,6 +201,7 @@ subroutine set_opacity(optics, fluxes, G, CS) call post_data(CS%id_sw_vis_pen, Pen_SW_tot, CS%diag) endif do n=1,optics%nbands ; if (CS%id_opacity(n) > 0) then +!$OMP parallel do default(shared) do k=1,nz ; do j=js,je ; do i=is,ie tmp(i,j,k) = optics%opacity_band(n,i,j,k) enddo ; enddo ; enddo @@ -265,62 +271,96 @@ subroutine opacity_from_chl(optics, fluxes, G, CS, chl_in) multiband_nir_input = (associated(fluxes%sw_nir_dir) .and. & associated(fluxes%sw_nir_dif)) - do k=1,nz - - if (present(chl_in)) then - do j=js,je ; do i=is,ie ; chl_data(i,j) = chl_in(i,j,k) ; enddo ; enddo - do j=js,je ; do i=is,ie - if ((G%mask2dT(i,j) > 0.5) .and. (chl_data(i,j) < 0.0)) then + chl_data(:,:) = 0.0 + if(present(chl_in)) then + do j=js,je ; do i=is,ie ; chl_data(i,j) = chl_in(i,j,1) ; enddo ; enddo + do k=1,nz; do j=js,je ; do i=is,ie + if ((G%mask2dT(i,j) > 0.5) .and. (chl_in(i,j,k) < 0.0)) then write(mesg,'(" Negative chl_in of ",(1pe12.4)," found at i,j,k = ", & & 3(1x,i3), " lon/lat = ",(1pe12.4)," E ", (1pe12.4), " N.")') & chl_data(i,j), i, j, k, G%geoLonT(i,j), G%geoLatT(i,j) call MOM_error(FATAL,"MOM_opacity opacity_from_chl: "//trim(mesg)) - endif - enddo ; enddo - elseif (k==1) then - ! Only the 2-d surface chlorophyll can be read in from a file. The - ! same value is assumed for all layers. - call get_time(CS%Time,seconds,days) - do j=js,je ; do i=is,ie ; chl_data(i,j) = 0.0 ; enddo ; enddo - call time_interp_external(CS%sbc_chl, CS%Time, chl_data) + endif + enddo; enddo; enddo + else + ! Only the 2-d surface chlorophyll can be read in from a file. The + ! same value is assumed for all layers. + call get_time(CS%Time,seconds,days) + call time_interp_external(CS%sbc_chl, CS%Time, chl_data) + do j=js,je ; do i=is,ie + if ((G%mask2dT(i,j) > 0.5) .and. (chl_data(i,j) < 0.0)) then + write(mesg,'(" Time_interp negative chl of ",(1pe12.4)," at i,j = ",& + & 2(i3), "lon/lat = ",(1pe12.4)," E ", (1pe12.4), " N.")') & + chl_data(i,j), i, j, G%geoLonT(i,j), G%geoLatT(i,j) + call MOM_error(FATAL,"MOM_opacity opacity_from_chl: "//trim(mesg)) + endif + enddo ; enddo + endif + + if (CS%id_chl > 0) then + if(present(chl_in)) then + call post_data(CS%id_chl, chl_in(:,:,1), CS%diag) + else + call post_data(CS%id_chl, chl_data, CS%diag) + endif + endif + + select case (CS%opacity_scheme) + case (MANIZZA_05) +!$OMP parallel do default(shared) private(SW_vis_tot,SW_nir_tot) do j=js,je ; do i=is,ie - if ((G%mask2dT(i,j) > 0.5) .and. (chl_data(i,j) < 0.0)) then - write(mesg,'(" Time_interp negative chl of ",(1pe12.4)," at i,j = ",& - & 2(i3), "lon/lat = ",(1pe12.4)," E ", (1pe12.4), " N.")') & - chl_data(i,j), i, j, G%geoLonT(i,j), G%geoLatT(i,j) - call MOM_error(FATAL,"MOM_opacity opacity_from_chl: "//trim(mesg)) + SW_vis_tot = 0.0 ; SW_nir_tot = 0.0 + if (G%mask2dT(i,j) > 0.5) then + if (multiband_vis_input) then + SW_vis_tot = fluxes%sw_vis_dir(i,j) + fluxes%sw_vis_dif(i,j) + else ! Follow Manizza 05 in assuming that 42% of SW is visible. + SW_vis_tot = 0.42 * fluxes%sw(i,j) + endif + if (multiband_nir_input) then + SW_nir_tot = fluxes%sw_nir_dir(i,j) + fluxes%sw_nir_dif(i,j) + else + SW_nir_tot = fluxes%sw(i,j) - SW_vis_tot + endif endif + + ! Band 1 is Manizza blue. + optics%sw_pen_band(1,i,j) = CS%blue_frac*SW_vis_tot + ! Band 2 (if used) is Manizza red. + if (nbands > 1) & + optics%sw_pen_band(2,i,j) = (1.0-CS%blue_frac)*SW_vis_tot + ! All remaining bands are NIR, for lack of something better to do. + do n=3,nbands + optics%sw_pen_band(n,i,j) = Inv_nbands_nir * SW_nir_tot + enddo + enddo ; enddo + case (MOREL_88) +!$OMP parallel do default(shared) private(SW_pen_tot) + do j=js,je ; do i=is,ie + SW_pen_tot = 0.0 + if (G%mask2dT(i,j) > 0.5) then ; if (multiband_vis_input) then + SW_pen_tot = SW_pen_frac_morel(chl_data(i,j)) * & + (fluxes%sw_vis_dir(i,j) + fluxes%sw_vis_dif(i,j)) + else + SW_pen_tot = SW_pen_frac_morel(chl_data(i,j)) * & + 0.5*fluxes%sw(i,j) + endif ; endif + + do n=1,nbands + optics%sw_pen_band(n,i,j) = Inv_nbands*SW_pen_tot + enddo enddo ; enddo + case default + call MOM_error(FATAL, "opacity_from_chl: CS%opacity_scheme is not valid.") + end select + +!$OMP parallel do default(shared) firstprivate(chl_data) + do k=1,nz + if (present(chl_in)) then + do j=js,je ; do i=is,ie ; chl_data(i,j) = chl_in(i,j,k) ; enddo ; enddo endif select case (CS%opacity_scheme) case (MANIZZA_05) - if (k==1) then ; do j=js,je ; do i=is,ie - SW_vis_tot = 0.0 ; SW_nir_tot = 0.0 - if (G%mask2dT(i,j) > 0.5) then - if (multiband_vis_input) then - SW_vis_tot = fluxes%sw_vis_dir(i,j) + fluxes%sw_vis_dif(i,j) - else ! Follow Manizza 05 in assuming that 42% of SW is visible. - SW_vis_tot = 0.42 * fluxes%sw(i,j) - endif - if (multiband_nir_input) then - SW_nir_tot = fluxes%sw_nir_dir(i,j) + fluxes%sw_nir_dif(i,j) - else - SW_nir_tot = fluxes%sw(i,j) - SW_vis_tot - endif - endif - - ! Band 1 is Manizza blue. - optics%sw_pen_band(1,i,j) = CS%blue_frac*SW_vis_tot - ! Band 2 (if used) is Manizza red. - if (nbands > 1) & - optics%sw_pen_band(2,i,j) = (1.0-CS%blue_frac)*SW_vis_tot - ! All remaining bands are NIR, for lack of something better to do. - do n=3,nbands - optics%sw_pen_band(n,i,j) = Inv_nbands_nir * SW_nir_tot - enddo - enddo ; enddo ; endif - do j=js,je ; do i=is,ie if (G%mask2dT(i,j) <= 0.5) then do n=1,optics%nbands @@ -335,25 +375,7 @@ subroutine opacity_from_chl(optics, fluxes, G, CS, chl_in) do n=3,nbands ; optics%opacity_band(n,i,j,k) = 2.86 ; enddo endif enddo ; enddo - case (MOREL_88) - if (k==1) then ! Set up the surface fluxes. - do j=js,je ; do i=is,ie - SW_pen_tot = 0.0 - if (G%mask2dT(i,j) > 0.5) then ; if (multiband_vis_input) then - SW_pen_tot = SW_pen_frac_morel(chl_data(i,j)) * & - (fluxes%sw_vis_dir(i,j) + fluxes%sw_vis_dif(i,j)) - else - SW_pen_tot = SW_pen_frac_morel(chl_data(i,j)) * & - 0.5*fluxes%sw(i,j) - endif ; endif - - do n=1,optics%nbands - optics%sw_pen_band(n,i,j) = Inv_nbands*SW_pen_tot - enddo - enddo ; enddo - endif - do j=js,je ; do i=is,ie optics%opacity_band(1,i,j,k) = CS%opacity_land_value if (G%mask2dT(i,j) > 0.5) & @@ -367,11 +389,9 @@ subroutine opacity_from_chl(optics, fluxes, G, CS, chl_in) case default call MOM_error(FATAL, "opacity_from_chl: CS%opacity_scheme is not valid.") end select - - if ((k==1) .and. (CS%id_chl > 0)) then - call post_data(CS%id_chl, chl_data, CS%diag) - endif enddo + + end subroutine opacity_from_chl function opacity_morel(chl_data) From b68270e4d06f84d7cd6e2410850f170d2621eb92 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 14 Apr 2014 09:55:48 -0400 Subject: [PATCH 10/14] Add more OMP directives --- src/parameterizations/lateral/MOM_thickness_diffuse.F90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parameterizations/lateral/MOM_thickness_diffuse.F90 b/src/parameterizations/lateral/MOM_thickness_diffuse.F90 index 513fe441d4..65cd525aca 100644 --- a/src/parameterizations/lateral/MOM_thickness_diffuse.F90 +++ b/src/parameterizations/lateral/MOM_thickness_diffuse.F90 @@ -1399,10 +1399,12 @@ subroutine vert_fill_TS(h, T_in, S_in, kappa, dt, T_f, S_f, G, halo_here) h0 = 1.0e-16*sqrt(kappa*dt)*G%m_to_H if (kap_dt_x2 <= 0.0) then +!$OMP parallel do default(shared) do k=1,nz ; do j=js,je ; do i=is,ie T_f(i,j,k) = T_in(i,j,k) ; S_f(i,j,k) = S_in(i,j,k) enddo ; enddo ; enddo else +!$OMP parallel do default(shared) private(ent,b1,d1,c1) do j=js,je do i=is,ie ent(i,2) = kap_dt_x2 / ((h(i,j,1)+h(i,j,2)) + h0) From b3a9a2fd0f8a72c0469f59d4325e3cb643ec8365 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 14 Apr 2014 09:56:57 -0400 Subject: [PATCH 11/14] implement openmp --- .../lateral/MOM_mixed_layer_restrat.F90 | 99 ++++++++++--------- 1 file changed, 53 insertions(+), 46 deletions(-) diff --git a/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 b/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 index d57b61a14b..e98b821245 100644 --- a/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 +++ b/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 @@ -175,7 +175,9 @@ subroutine mixedlayer_restrat(h, uhtr, vhtr, tv, fluxes, dt, G, CS) ! Fix this later for G%nkml >= 3. p0(:) = 0.0 - +!$OMP parallel default(shared) private(Rho0,h_vel,u_star,absf,mom_mixrate,timescale,uDml, & +!$OMP I2htot,z_topx2,hx2,a,vDml) +!$OMP do do j=js-1,je+1 do i=is-1,ie+1 htot(i,j) = 0.0 ; Rml_av(i,j) = 0.0 @@ -198,55 +200,58 @@ subroutine mixedlayer_restrat(h, uhtr, vhtr, tv, fluxes, dt, G, CS) ! 1. Mixing extends below the mixing layer to the mixed layer. Find it! ! 2. Add exponential tail to streamfunction? - do j=js,je ; do i=is,ie ; utimescale_diag(i,j) = 0.0 ; enddo ; enddo - do j=js,je ; do i=is,ie ; vtimescale_diag(i,j) = 0.0 ; enddo ; enddo - ! U - Component - do j=js,je ; do I=is-1,ie - h_vel = 0.5*(htot(i,j) + htot(i+1,j)) * G%H_to_m - - u_star = 0.5*(fluxes%ustar(i,j) + fluxes%ustar(i+1,j)) - absf = 0.5*(abs(G%CoriolisBu(I,J-1)) + abs(G%CoriolisBu(I,J))) - ! peak ML visc: u_star * 0.41 * (h_ml*u_star)/(absf*h_ml + 4.0*u_star) - ! momentum mixing rate: pi^2*visc/h_ml^2 - ! 0.41 is the von Karmen constant, 9.8696 = pi^2. - mom_mixrate = (0.41*9.8696)*u_star**2 / & - (absf*h_vel**2 + 4.0*(h_vel+dz_neglect)*u_star) - timescale = 0.0625 * (absf + 2.0*mom_mixrate) / (absf**2 + mom_mixrate**2) - - timescale = timescale * CS%ml_restrat_coef +!$OMP do + do j=js,je + do i=is,ie ; utimescale_diag(i,j) = 0.0 ; enddo + do i=is,ie ; vtimescale_diag(i,j) = 0.0 ; enddo + do I=is-1,ie + h_vel = 0.5*(htot(i,j) + htot(i+1,j)) * G%H_to_m + + u_star = 0.5*(fluxes%ustar(i,j) + fluxes%ustar(i+1,j)) + absf = 0.5*(abs(G%CoriolisBu(I,J-1)) + abs(G%CoriolisBu(I,J))) + ! peak ML visc: u_star * 0.41 * (h_ml*u_star)/(absf*h_ml + 4.0*u_star) + ! momentum mixing rate: pi^2*visc/h_ml^2 + ! 0.41 is the von Karmen constant, 9.8696 = pi^2. + mom_mixrate = (0.41*9.8696)*u_star**2 / & + (absf*h_vel**2 + 4.0*(h_vel+dz_neglect)*u_star) + timescale = 0.0625 * (absf + 2.0*mom_mixrate) / (absf**2 + mom_mixrate**2) + + timescale = timescale * CS%ml_restrat_coef ! timescale = timescale*(2?)*(L_def/L_MLI)*min(EKE/MKE,1.0 + G%dyCv(i,j)**2/L_def**2)) - utimescale_diag(I,j) = timescale - - uDml(I) = timescale * G%mask2dCu(I,j)*G%dyCu(I,j)* & - G%IdxCu(I,j)*(Rml_av(i+1,j)-Rml_av(i,j)) * (h_vel**2 * G%m_to_H) - - if (uDml(i) == 0) then - do k=1,G%nkml ; uhml(I,j,k) = 0.0 ; enddo - else - I2htot = 1.0 / (htot(i,j) + htot(i+1,j) + h_neglect) - z_topx2 = 0.0 - ! a(k) relates the sublayer transport to uDml with a linear profile. - ! The sum of a through the mixed layers must be 0. - do k=1,G%nkml - hx2 = (h(i,j,k) + h(i+1,j,k) + h_neglect) - a(k) = (hx2 * I2htot) * (2.0 - 4.0*(z_topx2+0.5*hx2)*I2htot) - z_topx2 = z_topx2 + hx2 - if (a(k)*uDml(I) > 0.0) then - if (a(k)*uDml(I) > h_avail(i,j,k)) uDml(I) = h_avail(i,j,k) / a(k) - else - if (-a(k)*uDml(I) > h_avail(i+1,j,k)) uDml(I) = -h_avail(i+1,j,k)/a(k) - endif - enddo - do k=1,G%nkml - uhml(I,j,k) = a(k)*uDml(I) - uhtr(I,j,k) = uhtr(I,j,k) + uhml(I,j,k)*dt - enddo - endif - enddo ; enddo + utimescale_diag(I,j) = timescale + + uDml(I) = timescale * G%mask2dCu(I,j)*G%dyCu(I,j)* & + G%IdxCu(I,j)*(Rml_av(i+1,j)-Rml_av(i,j)) * (h_vel**2 * G%m_to_H) + + if (uDml(i) == 0) then + do k=1,G%nkml ; uhml(I,j,k) = 0.0 ; enddo + else + I2htot = 1.0 / (htot(i,j) + htot(i+1,j) + h_neglect) + z_topx2 = 0.0 + ! a(k) relates the sublayer transport to uDml with a linear profile. + ! The sum of a through the mixed layers must be 0. + do k=1,G%nkml + hx2 = (h(i,j,k) + h(i+1,j,k) + h_neglect) + a(k) = (hx2 * I2htot) * (2.0 - 4.0*(z_topx2+0.5*hx2)*I2htot) + z_topx2 = z_topx2 + hx2 + if (a(k)*uDml(I) > 0.0) then + if (a(k)*uDml(I) > h_avail(i,j,k)) uDml(I) = h_avail(i,j,k) / a(k) + else + if (-a(k)*uDml(I) > h_avail(i+1,j,k)) uDml(I) = -h_avail(i+1,j,k)/a(k) + endif + enddo + do k=1,G%nkml + uhml(I,j,k) = a(k)*uDml(I) + uhtr(I,j,k) = uhtr(I,j,k) + uhml(I,j,k)*dt + enddo + endif + enddo + enddo ! V- component +!$OMP do do J=js-1,je ; do i=is,ie h_vel = 0.5*(htot(i,j) + htot(i,j+1)) * G%H_to_m @@ -290,10 +295,12 @@ subroutine mixedlayer_restrat(h, uhtr, vhtr, tv, fluxes, dt, G, CS) endif enddo ; enddo - do k=1,G%nkml ; do j=js,je ; do i=is,ie +!$OMP do + do j=js,je ; do k=1,G%nkml ; do i=is,ie h(i,j,k) = h(i,j,k) - dt*G%IareaT(i,j) * & ((uhml(I,j,k) - uhml(I-1,j,k)) + (vhml(i,J,k) - vhml(i,J-1,k))) enddo ; enddo ; enddo +!$OMP end parallel ! Offer fields for averaging. if (query_averaging_enabled(CS%diag) .and. & From dd7a7996bc5fd17407702ee4f7c9cbc24c8ef924 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 14 Apr 2014 10:01:07 -0400 Subject: [PATCH 12/14] implement openmp --- .../lateral/MOM_lateral_mixing_coeffs.F90 | 63 ++++++++++++------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 b/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 index e823a23c05..5bf68e7db4 100644 --- a/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 +++ b/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 @@ -271,6 +271,8 @@ subroutine calc_slope_function_(h, tv, G, CS, e) real :: one_meter ! One meter in thickness units of m or kg m-2. integer :: is, ie, js, je, nz integer :: i, j, k, kb_max + real :: SN_u_local(SZIB_(G), SZJ_(G),SZK_(G)) + real :: SN_v_local(SZI_(G), SZJB_(G),SZK_(G)) if (.not. ASSOCIATED(CS)) call MOM_error(FATAL, "calc_slope_function:"// & "Module must be initialized before it is used.") @@ -286,6 +288,8 @@ subroutine calc_slope_function_(h, tv, G, CS, e) h_neglect = G%H_subroundoff H_cutoff = real(2*nz) * (G%Angstrom + h_neglect) +!$OMP parallel default(shared) private(E_x,E_y,S2,Hdn,Hup,H_geom,N2) +!$OMP do do j=js-1,je+1 ; do i=is-1,ie+1 CS%SN_u(i,j) = 0.0 CS%SN_v(i,j) = 0.0 @@ -296,14 +300,16 @@ subroutine calc_slope_function_(h, tv, G, CS, e) ! and midlatitude deformation radii, using calc_resoln_function as a template. ! Set the length scale at u-points. +!$OMP do do j=js,je ; do I=is-1,ie CS%L2u(I,j) = CS%Visbeck_L_scale**2 enddo ; enddo ! Set length scale at v-points +!$OMP do do J=js-1,je ; do i=is,ie CS%L2v(i,J) = CS%Visbeck_L_scale**2 enddo ; enddo - +!$OMP do do k=nz,CS%VarMix_Ktop,-1 ! Calculate the interface slopes E_x and E_y and u- and v- points respectively @@ -328,7 +334,7 @@ subroutine calc_slope_function_(h, tv, G, CS, e) N2 = G%g_prime(k) / (G%H_to_m * max(Hdn,Hup,one_meter)) if (min(h(i,j,k-1), h(i+1,j,k-1), h(i,j,k), h(i+1,j,k)) < H_cutoff) & S2 = 0.0 - CS%SN_u(I,j) = CS%SN_u(I,j) + (H_geom * G%H_to_m) * S2 * N2 + SN_u_local(I,j,k) = (H_geom * G%H_to_m) * S2 * N2 enddo ; enddo do J=js-1,je ; do i=is,ie S2 = ( E_y(i,J)**2 + 0.25*( & @@ -339,30 +345,43 @@ subroutine calc_slope_function_(h, tv, G, CS, e) N2 = G%g_prime(k) / (G%H_to_m * max(Hdn,Hup,one_meter)) if (min(h(i,j,k-1), h(i,j+1,k-1), h(i,j,k), h(i,j+1,k)) < H_cutoff) & S2 = 0.0 - CS%SN_v(i,J) = CS%SN_v(i,J) + (H_geom * G%H_to_m) * S2 * N2 + SN_v_local(i,J,k) = (H_geom * G%H_to_m) * S2 * N2 enddo ; enddo enddo ! k +!$OMP do + do j = js,je; + do k=nz,CS%VarMix_Ktop,-1 ; do I=is-1,ie + CS%SN_u(I,j) = CS%SN_u(I,j) + SN_u_local(I,j,k) + enddo ; enddo + ! SN above contains S^2*N^2*H, convert to vertical average of S*N + do I=is-1,ie + !SN_u(I,j) = sqrt( SN_u(I,j) / ( max(G%bathyT(I,j), G%bathyT(I+1,j)) + G%Angstrom ) ) + !The code below behaves better than the line above. Not sure why? AJA + if ( min(G%bathyT(I,j), G%bathyT(I+1,j)) > H_cutoff ) then + CS%SN_u(I,j) = sqrt( CS%SN_u(I,j) / max(G%bathyT(I,j), G%bathyT(I+1,j)) ) + else + CS%SN_u(I,j) = 0.0 + endif + enddo + enddo +!$OMP do + do J=js-1,je + do k=nz,CS%VarMix_Ktop,-1 ; do I=is,ie + CS%SN_v(i,J) = CS%SN_v(i,J) + SN_v_local(i,J,k) + enddo ; enddo + do i=is,ie + !SN_v(i,J) = sqrt( SN_v(i,J) / ( max(G%bathyT(i,J), G%bathyT(i,J+1)) + G%Angstrom ) ) + !The code below behaves better than the line above. Not sure why? AJA + if ( min(G%bathyT(I,j), G%bathyT(I+1,j)) > H_cutoff ) then + CS%SN_v(i,J) = sqrt( CS%SN_v(i,J) / max(G%bathyT(i,J), G%bathyT(i,J+1)) ) + else + CS%SN_v(I,j) = 0.0 + endif + enddo + enddo - ! SN above contains S^2*N^2*H, convert to vertical average of S*N - do j=js,je ; do I=is-1,ie - !SN_u(I,j) = sqrt( SN_u(I,j) / ( max(G%bathyT(I,j), G%bathyT(I+1,j)) + G%Angstrom ) ) - !The code below behaves better than the line above. Not sure why? AJA - if ( min(G%bathyT(I,j), G%bathyT(I+1,j)) > H_cutoff ) then - CS%SN_u(I,j) = sqrt( CS%SN_u(I,j) / max(G%bathyT(I,j), G%bathyT(I+1,j)) ) - else - CS%SN_u(I,j) = 0.0 - endif - enddo ; enddo - do J=js-1,je ; do i=is,ie - !SN_v(i,J) = sqrt( SN_v(i,J) / ( max(G%bathyT(i,J), G%bathyT(i,J+1)) + G%Angstrom ) ) - !The code below behaves better than the line above. Not sure why? AJA - if ( min(G%bathyT(I,j), G%bathyT(I+1,j)) > H_cutoff ) then - CS%SN_v(i,J) = sqrt( CS%SN_v(i,J) / max(G%bathyT(i,J), G%bathyT(i,J+1)) ) - else - CS%SN_v(I,j) = 0.0 - endif - enddo ; enddo +!$OMP end parallel ! Offer diagnostic fields for averaging. if (query_averaging_enabled(CS%diag)) then From 5d742b8c9cda9695fe607e4569f2a8a1a5b7ec92 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 14 Apr 2014 10:03:24 -0400 Subject: [PATCH 13/14] implement openmp --- src/diagnostics/MOM_diagnostics.F90 | 143 ++++++++++++++++------------ 1 file changed, 83 insertions(+), 60 deletions(-) diff --git a/src/diagnostics/MOM_diagnostics.F90 b/src/diagnostics/MOM_diagnostics.F90 index fd951c58cf..33f05b4430 100644 --- a/src/diagnostics/MOM_diagnostics.F90 +++ b/src/diagnostics/MOM_diagnostics.F90 @@ -141,7 +141,7 @@ subroutine calculate_diagnostic_fields(u, v, h, uh, vh, tv, ADp, CDp, dt, G, & type(cont_diag_ptrs), intent(in) :: CDp real, intent(in) :: dt type(ocean_grid_type), intent(inout) :: G - type(diagnostics_CS), pointer :: CS + type(diagnostics_CS), intent(inout) :: CS real, dimension(NIMEM_,NJMEM_), optional, intent(in) :: eta_bt ! Any diagnostic quantities that are not more naturally calculated ! in the various other subroutines are calculated here. @@ -188,7 +188,7 @@ subroutine calculate_diagnostic_fields(u, v, h, uh, vh, tv, ADp, CDp, dt, G, & ! one iteration which would break the follwoing one-line workaround! if (nkmb==0) nkmb = nz - if (.not.associated(CS)) call MOM_error(FATAL, & + if (loc(CS)==0) call MOM_error(FATAL, & "calculate_diagnostic_fields: Module must be initialized before it is used.") call calculate_derivs(dt, G, CS) @@ -222,6 +222,7 @@ subroutine calculate_diagnostic_fields(u, v, h, uh, vh, tv, ADp, CDp, dt, G, & if (associated(tv%eqn_of_state)) then pres(:) = tv%P_Ref +!$OMP parallel do default(shared) do k=1,nz ; do j=js,je+1 call calculate_density(tv%T(:,j,k),tv%S(:,j,k),pres, & Rcv(:,j,k),is,ie-is+2, tv%eqn_of_state) @@ -231,82 +232,103 @@ subroutine calculate_diagnostic_fields(u, v, h, uh, vh, tv, ADp, CDp, dt, G, & endif if (ASSOCIATED(CS%h_Rlay)) then - do k=1,nkmb ; do j=js,je; do i=is,ie - CS%h_Rlay(i,j,k) = 0.0 - enddo ; enddo ; enddo - do k=nkmb+1,nz ; do j=js,je ; do i=is,ie - CS%h_Rlay(i,j,k) = h(i,j,k) - enddo ; enddo ; enddo k_list = nz/2 - do j=js,je ; do k=1,nkmb ; do i=is,ie - call find_weights(G%Rlay, Rcv(i,j,k), k_list, nz, wt, wt_p) - CS%h_Rlay(i,j,k_list) = CS%h_Rlay(i,j,k_list) + h(i,j,k)*wt - CS%h_Rlay(i,j,k_list+1) = CS%h_Rlay(i,j,k_list+1) + h(i,j,k)*wt_p - enddo ; enddo ; enddo +!$OMP parallel do default(shared) private(wt,wt_p) firstprivate(k_list) + do j=js,je + do k=1,nkmb ; do i=is,ie + CS%h_Rlay(i,j,k) = 0.0 + enddo ; enddo + do k=nkmb+1,nz ; do i=is,ie + CS%h_Rlay(i,j,k) = h(i,j,k) + enddo ; enddo + do k=1,nkmb ; do i=is,ie + call find_weights(G%Rlay, Rcv(i,j,k), k_list, nz, wt, wt_p) + CS%h_Rlay(i,j,k_list) = CS%h_Rlay(i,j,k_list) + h(i,j,k)*wt + CS%h_Rlay(i,j,k_list+1) = CS%h_Rlay(i,j,k_list+1) + h(i,j,k)*wt_p + enddo ; enddo + enddo + if (CS%id_h_Rlay > 0) call post_data(CS%id_h_Rlay, CS%h_Rlay, CS%diag) endif if (ASSOCIATED(CS%uh_Rlay)) then - do k=1,nkmb ; do j=js,je; do I=Isq,Ieq - CS%uh_Rlay(I,j,k) = 0.0 - enddo ; enddo ; enddo - do k=nkmb+1,nz ; do j=js,je ; do I=Isq,Ieq - CS%uh_Rlay(I,j,k) = uh(I,j,k) - enddo ; enddo ; enddo k_list = nz/2 - do j=js,je ; do k=1,nkmb ; do I=Isq,Ieq - call find_weights(G%Rlay, 0.5*(Rcv(i,j,k)+Rcv(i+1,j,k)), k_list, nz, wt, wt_p) - CS%uh_Rlay(I,j,k_list) = CS%uh_Rlay(I,j,k_list) + uh(I,j,k)*wt - CS%uh_Rlay(I,j,k_list+1) = CS%uh_Rlay(I,j,k_list+1) + uh(I,j,k)*wt_p - enddo ; enddo ; enddo +!$OMP parallel do default(shared) private(wt,wt_p) firstprivate(k_list) + do j=js,je + do k=1,nkmb ; do I=Isq,Ieq + CS%uh_Rlay(I,j,k) = 0.0 + enddo ; enddo + do k=nkmb+1,nz ; do I=Isq,Ieq + CS%uh_Rlay(I,j,k) = uh(I,j,k) + enddo ; enddo + k_list = nz/2 + do k=1,nkmb ; do I=Isq,Ieq + call find_weights(G%Rlay, 0.5*(Rcv(i,j,k)+Rcv(i+1,j,k)), k_list, nz, wt, wt_p) + CS%uh_Rlay(I,j,k_list) = CS%uh_Rlay(I,j,k_list) + uh(I,j,k)*wt + CS%uh_Rlay(I,j,k_list+1) = CS%uh_Rlay(I,j,k_list+1) + uh(I,j,k)*wt_p + enddo ; enddo + enddo + if (CS%id_uh_Rlay > 0) call post_data(CS%id_uh_Rlay, CS%uh_Rlay, CS%diag) endif if (ASSOCIATED(CS%vh_Rlay)) then - do k=1,nkmb ; do J=Jsq,Jeq; do i=is,ie - CS%vh_Rlay(i,J,k) = 0.0 - enddo ; enddo ; enddo - do k=nkmb+1,nz ; do J=Jsq,Jeq ; do i=is,ie - CS%vh_Rlay(i,J,k) = vh(i,J,k) - enddo ; enddo ; enddo k_list = nz/2 - do J=Jsq,Jeq ; do k=1,nkmb ; do i=is,ie - call find_weights(G%Rlay, 0.5*(Rcv(i,j,k)+Rcv(i,j+1,k)), k_list, nz, wt, wt_p) - CS%vh_Rlay(i,J,k_list) = CS%vh_Rlay(i,J,k_list) + vh(i,J,k)*wt - CS%vh_Rlay(i,J,k_list+1) = CS%vh_Rlay(i,J,k_list+1) + vh(i,J,k)*wt_p - enddo ; enddo ; enddo +!$OMP parallel do default(shared) private(wt,wt_p) firstprivate(k_list) + do J=Jsq,Jeq + do k=1,nkmb ; do i=is,ie + CS%vh_Rlay(i,J,k) = 0.0 + enddo ; enddo + do k=nkmb+1,nz ; do i=is,ie + CS%vh_Rlay(i,J,k) = vh(i,J,k) + enddo ; enddo + do k=1,nkmb ; do i=is,ie + call find_weights(G%Rlay, 0.5*(Rcv(i,j,k)+Rcv(i,j+1,k)), k_list, nz, wt, wt_p) + CS%vh_Rlay(i,J,k_list) = CS%vh_Rlay(i,J,k_list) + vh(i,J,k)*wt + CS%vh_Rlay(i,J,k_list+1) = CS%vh_Rlay(i,J,k_list+1) + vh(i,J,k)*wt_p + enddo ; enddo + enddo + if (CS%id_vh_Rlay > 0) call post_data(CS%id_vh_Rlay, CS%vh_Rlay, CS%diag) endif if (ASSOCIATED(CS%uhGM_Rlay) .and. ASSOCIATED(CDp%uhGM)) then - do k=1,nkmb ; do j=js,je; do I=Isq,Ieq - CS%uhGM_Rlay(I,j,k) = 0.0 - enddo ; enddo ; enddo - do k=nkmb+1,nz ; do j=js,je ; do I=Isq,Ieq - CS%uhGM_Rlay(I,j,k) = CDp%uhGM(I,j,k) - enddo ; enddo ; enddo k_list = nz/2 - do j=js,je ; do k=1,nkmb ; do I=Isq,Ieq - call find_weights(G%Rlay, 0.5*(Rcv(i,j,k)+Rcv(i+1,j,k)), k_list, nz, wt, wt_p) - CS%uhGM_Rlay(I,j,k_list) = CS%uhGM_Rlay(I,j,k_list) + CDp%uhGM(I,j,k)*wt - CS%uhGM_Rlay(I,j,k_list+1) = CS%uhGM_Rlay(I,j,k_list+1) + CDp%uhGM(I,j,k)*wt_p - enddo ; enddo ; enddo +!$OMP parallel do default(shared) private(wt,wt_p) firstprivate(k_list) + do j=js,je + do k=1,nkmb ; do I=Isq,Ieq + CS%uhGM_Rlay(I,j,k) = 0.0 + enddo ; enddo + do k=nkmb+1,nz ; do I=Isq,Ieq + CS%uhGM_Rlay(I,j,k) = CDp%uhGM(I,j,k) + enddo ; enddo + do k=1,nkmb ; do I=Isq,Ieq + call find_weights(G%Rlay, 0.5*(Rcv(i,j,k)+Rcv(i+1,j,k)), k_list, nz, wt, wt_p) + CS%uhGM_Rlay(I,j,k_list) = CS%uhGM_Rlay(I,j,k_list) + CDp%uhGM(I,j,k)*wt + CS%uhGM_Rlay(I,j,k_list+1) = CS%uhGM_Rlay(I,j,k_list+1) + CDp%uhGM(I,j,k)*wt_p + enddo ; enddo + enddo + if (CS%id_uh_Rlay > 0) call post_data(CS%id_uhGM_Rlay, CS%uhGM_Rlay, CS%diag) endif if (ASSOCIATED(CS%vhGM_Rlay) .and. ASSOCIATED(CDp%vhGM)) then - do k=1,nkmb ; do J=Jsq,Jeq; do i=is,ie - CS%vhGM_Rlay(i,J,k) = 0.0 - enddo ; enddo ; enddo - do k=nkmb+1,nz ; do J=Jsq,Jeq ; do i=is,ie - CS%vhGM_Rlay(i,J,k) = CDp%vhGM(i,J,k) - enddo ; enddo ; enddo k_list = nz/2 - do J=Jsq,Jeq ; do k=1,nkmb ; do i=is,ie - call find_weights(G%Rlay, 0.5*(Rcv(i,j,k)+Rcv(i,j+1,k)), k_list, nz, wt, wt_p) - CS%vhGM_Rlay(i,J,k_list) = CS%vhGM_Rlay(i,J,k_list) + CDp%vhGM(i,J,k)*wt - CS%vhGM_Rlay(i,J,k_list+1) = CS%vhGM_Rlay(i,J,k_list+1) + CDp%vhGM(i,J,k)*wt_p - enddo ; enddo ; enddo +!$OMP parallel do default(shared) private(wt,wt_p) firstprivate(k_list) + do J=Jsq,Jeq + do k=1,nkmb ; do i=is,ie + CS%vhGM_Rlay(i,J,k) = 0.0 + enddo ; enddo + do k=nkmb+1,nz ; do i=is,ie + CS%vhGM_Rlay(i,J,k) = CDp%vhGM(i,J,k) + enddo ; enddo + do k=1,nkmb ; do i=is,ie + call find_weights(G%Rlay, 0.5*(Rcv(i,j,k)+Rcv(i,j+1,k)), k_list, nz, wt, wt_p) + CS%vhGM_Rlay(i,J,k_list) = CS%vhGM_Rlay(i,J,k_list) + CDp%vhGM(i,J,k)*wt + CS%vhGM_Rlay(i,J,k_list+1) = CS%vhGM_Rlay(i,J,k_list+1) + CDp%vhGM(i,J,k)*wt_p + enddo ; enddo + enddo + if (CS%id_vhGM_Rlay > 0) call post_data(CS%id_vhGM_Rlay, CS%vhGM_Rlay, CS%diag) endif endif @@ -316,6 +338,7 @@ subroutine calculate_diagnostic_fields(u, v, h, uh, vh, tv, ADp, CDp, dt, G, & call wave_speed(h, tv, G, CS%cg1, CS%wave_speed_CSp) if (CS%id_cg1>0) call post_data(CS%id_cg1, CS%cg1, CS%diag) if (CS%id_Rd1>0) then +!$OMP parallel do default(shared) private(f2_h,mag_beta) do j=js,je ; do i=is,ie ! Blend the equatorial deformation radius with the standard one. f2_h = absurdly_small_freq2 + 0.25 * & @@ -424,7 +447,7 @@ subroutine calculate_vertical_integrals(h, tv, G, CS) real, dimension(NIMEM_,NJMEM_,NKMEM_), intent(in) :: h type(thermo_var_ptrs), intent(in) :: tv type(ocean_grid_type), intent(inout) :: G - type(diagnostics_CS), pointer :: CS + type(diagnostics_CS), intent(inout) :: CS ! This subroutine calculates the vertical integrals of several tracers, along ! with the mass-weight of these tracers, the total column mass, and the ! carefully calculated column height. @@ -523,7 +546,7 @@ subroutine calculate_energy_diagnostics(u, v, h, uh, vh, ADp, CDp, G, CS) type(accel_diag_ptrs), intent(in) :: ADp type(cont_diag_ptrs), intent(in) :: CDp type(ocean_grid_type), intent(inout) :: G - type(diagnostics_CS), pointer :: CS + type(diagnostics_CS), intent(inout) :: CS ! This subroutine calculates a series of terms in the energy ! balance equations. @@ -739,7 +762,7 @@ end subroutine register_time_deriv subroutine calculate_derivs(dt, G, CS) real, intent(in) :: dt type(ocean_grid_type), intent(inout) :: G - type(diagnostics_CS), pointer :: CS + type(diagnostics_CS), intent(inout) :: CS ! This subroutine calculates all registered time derivatives. ! Arguments: dt - the time interval in s over which differences occur ! (in) G - The ocean's grid structure. From 2c162fc1a4fc2c980581a55eef5b49db3d7d0836 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 14 Apr 2014 10:04:13 -0400 Subject: [PATCH 14/14] openmp bug fix --- src/core/MOM_legacy_barotropic.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/MOM_legacy_barotropic.F90 b/src/core/MOM_legacy_barotropic.F90 index 54a1a5c66b..4f03a28498 100644 --- a/src/core/MOM_legacy_barotropic.F90 +++ b/src/core/MOM_legacy_barotropic.F90 @@ -3356,7 +3356,7 @@ subroutine legacy_bt_mass_source(h, eta, fluxes, set_cor, dt_therm, & is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec ; nz = G%ke -!$OMP parallel do default(shared) private(i,j,k,eta_h) +!$OMP parallel do default(shared) private(eta_h,h_tot,limit_dt,d_eta) do j=js,je do i=is,ie ; h_tot(i) = h(i,j,1) ; enddo if (G%Boussinesq) then