fixed bug in allow_extrapolation and added the _lr routine for static…

… correlations
metno · Sep 13, 2024 · c104db0 · c104db0
1 parent 1a3bb80
commit c104db0
Show file tree

Hide file tree

Showing 2 changed files with 312 additions and 15 deletions.
diff --git a/include/gridpp.h b/include/gridpp.h
@@ -350,25 +350,25 @@ namespace gridpp {
             const StructureFunction& structure,
             float var_ratios_or,
             float std_ratios_lr,
-            float weigth,
+            float weight,
             int max_points,
             bool allow_extrapolation=true); */
 
     /** Optimal interpolation for an ensemble gridded field (alternative version that works with R bindings)
       * Work in progress
       * @param bpoints Points of background field
-      * @param background 2D vector of (left) background values to update (M, E) M=num. grid points
-      * @param background 2D vector of (LEFT) background values (M, E) used to compute correlations
+      * @param background_l background 2D vector of (left) background values to update (M, E) M=num. grid points
+      * @param background_L background 2D vector of (LEFT) background values (M, E) used to compute correlations
       * @param obs_points Observation points
       * @param obs 2D vector of perturbed observations (S, E) S=num. obs points
-      * @param background 2D vector of (right) background values used to compute innovations (S, E)
-      * @param background 2D vector of (RIGHT) background values (S, E) used to compute correlations
+      * @param pbackground_r background 2D vector of (right) background values used to compute innovations (S, E)
+      * @param pbackground_R background 2D vector of (RIGHT) background values (S, E) used to compute correlations
       * @param which_structfun structure function to use (0=Barnes;1=MixA)
       * @param dh length scale for the horizontal structure function
       * @param dz length scale for the vertical structure function
       * @param dw minimum value of the correlation coefficient for laf structure function
-      * @param variance_ratio (ratio of observation to right background error variance)
-      * @param standard deviation ratio (ratio of left to right background error standard deviation)
+      * @param var_ratios_or variance_ratio (ratio of observation to right background error variance)
+      * @param std_ratios_lr standard deviation ratio (ratio of left to right background error standard deviation)
       * @param weight given to the analysis increment
       * @param max_points Maximum number of observations to use inside localization zone; Use 0 to disable
       * @param allow_extrapolation Allow OI to extrapolate increments outside increments at observations
@@ -388,7 +388,43 @@ namespace gridpp {
             float dw,
             float var_ratios_or,
             float std_ratios_lr,
-            float weigth,
+            float weight,
+            int max_points,
+            bool allow_extrapolation=true);
+
+    /** Optimal interpolation for an ensemble gridded field with static covariances
+      * Work in progress
+      * @param bpoints Points of background field
+      * @param background_l background 2D vector of (left) background values to update (M, E) M=num. grid points
+      * @param background_L background 2D vector of (LEFT) background values (M, E) used to compute correlations
+      * @param obs_points Observation points
+      * @param obs 2D vector of perturbed observations (S, E) S=num. obs points
+      * @param pbackground_r background 2D vector of (right) background values used to compute innovations (S, E)
+      * @param pbackground_R background 2D vector of (RIGHT) background values (S, E) used to compute correlations
+      * @param which_structfun structure function to use (0=Barnes;1=MixA)
+      * @param dh length scale for the horizontal structure function
+      * @param dz length scale for the vertical structure function
+      * @param dw minimum value of the correlation coefficient for laf structure function
+      * @param var_ratios_or variance_ratio (ratio of observation to right background error variance)
+      * @param std_ratios_lr standard deviation ratio (ratio of left to right background error standard deviation)
+      * @param weight given to the analysis increment
+      * @param max_points Maximum number of observations to use inside localization zone; Use 0 to disable
+      * @param allow_extrapolation Allow OI to extrapolate increments outside increments at observations
+      * @returns 2D vector of analised values (M, E)
+    */
+    vec2 R_optimal_interpolation_ensi_staticcorr_lr(const Points& bpoints,
+            const vec2& background_l,
+            const Points& obs_points,
+            const vec2& obs,
+            const vec2& pbackground_r,
+/*            const StructureFunction& structure, */
+            int which_structfun,
+            float dh,
+            float dz,
+            float dw,
+            float var_ratios_or,
+            float std_ratios_lr,
+            float weight,
             int max_points,
             bool allow_extrapolation=true);
 

diff --git a/src/api/oi_ensi_lr.cpp b/src/api/oi_ensi_lr.cpp
@@ -40,7 +40,7 @@ template void print_matrix< ::cxtype>(::cxtype matrix);
         const gridpp::StructureFunction& structure,
         float var_ratios_or, 
         float std_ratios_lr,
-        float weigth, 
+        float weight, 
         int max_points,
         bool allow_extrapolation) {
     double s_time = gridpp::clock();
@@ -108,7 +108,7 @@ template void print_matrix< ::cxtype>(::cxtype matrix);
             count++;
         }
     }
-    vec2 output1 = optimal_interpolation_ensi_lr(bpoints, background_l1, background_L1, points, pobs, pbackground_r, pbackground_R, structure, var_ratios_or, std_ratios_lr, weigth, max_points, allow_extrapolation);
+    vec2 output1 = optimal_interpolation_ensi_lr(bpoints, background_l1, background_L1, points, pobs, pbackground_r, pbackground_R, structure, var_ratios_or, std_ratios_lr, weight, max_points, allow_extrapolation);
     vec3 output = gridpp::init_vec3(nY, nX, nE);
     count = 0;
     for(int y = 0; y < nY; y++) {
@@ -121,6 +121,8 @@ template void print_matrix< ::cxtype>(::cxtype matrix);
     }
     return output;
 } */
+/* command to fix the gridpp.R file:
+for i in {1..50}; do sed -i "s/all(sapply(argv\[\[$i\]\] , is.integer) || sapply(argv\[\[$i\]\], is.numeric))/all(sapply(argv\[\[$i\]\] , is.integer) | sapply(argv\[\[$i\]\], is.numeric))/g" gridpp.R; done */
 vec2 gridpp::R_optimal_interpolation_ensi_lr(const gridpp::Points& bpoints,
         const vec2& background_l, 
         const vec2& background_L, 
@@ -135,7 +137,7 @@ vec2 gridpp::R_optimal_interpolation_ensi_lr(const gridpp::Points& bpoints,
         float dw,
         float var_ratios_or,
         float std_ratios_lr,
-        float weigth,
+        float weight,
         int max_points,
         bool allow_extrapolation) {
     if(max_points < 0)
@@ -411,13 +413,13 @@ vec2 gridpp::R_optimal_interpolation_ensi_lr(const gridpp::Points& bpoints,
         // lK(1, lS): Kalman gain
         mattype lK = lr_lr * arma::inv(lR_rr + lR_dd);
         // dx(1, nValidEns): analysis increment 
-        mattype dx = std_ratios_lr * weigth * (lK * lInnov);
+        mattype dx = std_ratios_lr * weight * (lK * lInnov);
         ///////////////////////////////
         // Anti-extrapolation filter //
         ///////////////////////////////
         if(!allow_extrapolation) {
-            vectype maxIncEns = arma::max(lInnov);
-            vectype minIncEns = arma::min(lInnov);
+            arma::rowvec maxIncEns = arma::max(lInnov);
+            arma::rowvec minIncEns = arma::min(lInnov);
 
             for(int e = 0; e < nValidEns; e++) {
                 float increment = dx[e];
@@ -457,4 +459,263 @@ vec2 gridpp::R_optimal_interpolation_ensi_lr(const gridpp::Points& bpoints,
 
     } // end loop over gridpoint 
     return output;
-}
+} // end of R_optimal_interpolation_ensi_lr
+
+/* command to fix the gridpp.R file:
+for i in {1..50}; do sed -i "s/all(sapply(argv\[\[$i\]\] , is.integer) || sapply(argv\[\[$i\]\], is.numeric))/all(sapply(argv\[\[$i\]\] , is.integer) | sapply(argv\[\[$i\]\], is.numeric))/g" gridpp.R; done */
+vec2 gridpp::R_optimal_interpolation_ensi_staticcorr_lr(const gridpp::Points& bpoints,
+        const vec2& background_l, 
+        const gridpp::Points& points,
+        const vec2& pobs,  
+        const vec2& pbackground_r, 
+/*        const gridpp::StructureFunction& structure, it creates problem with R bindings */
+        int which_structfun,
+        float dh,
+        float dz,
+        float dw,
+        float var_ratios_or,
+        float std_ratios_lr,
+        float weight,
+        int max_points,
+        bool allow_extrapolation) {
+    if(max_points < 0)
+        throw std::invalid_argument("max_points must be >= 0");
+    if(bpoints.get_coordinate_type() != points.get_coordinate_type()) {
+        throw std::invalid_argument("Both background and observations points must be of same coorindate type (lat/lon or x/y)");
+    }
+    if(background_l.size() != bpoints.size())
+        throw std::invalid_argument("Input left field is not the same size as the grid");
+    if(pobs.size() != points.size())
+        throw std::invalid_argument("Observations and points exception mismatch");
+    if(pbackground_r.size() != points.size())
+        throw std::invalid_argument("Background rigth and points size mismatch");
+
+    float hmax = 7 * dh;
+
+    int nS = points.size();
+    if(nS == 0)
+        return background_l;
+
+    int mY = -1;  // Write debug information for this station index
+    bool diagnose = false;
+
+    int nY = background_l.size();
+    int nEns = background_l[0].size();
+
+    // Prepare output matrix
+    float missing_value = -99999.999;
+    vec2 output = background_l;
+
+    vec blats = bpoints.get_lats();
+    vec blons = bpoints.get_lons();
+    vec belevs = bpoints.get_elevs();
+    vec blafs = bpoints.get_lafs();
+
+    vec plats = points.get_lats();
+    vec plons = points.get_lons();
+    vec pelevs = points.get_elevs();
+    vec plafs = points.get_lafs();
+
+    // Create all objects of type Point (to save time on creation later)
+    std::vector<Point> point_vec;
+    point_vec.reserve(nS);
+    for(int i = 0; i < nS; i++) {
+        point_vec.push_back(points.get_point(i));
+    }
+
+    // Calculate number of valid members
+    // NOTE: all backgrounds must be simultaneously valid
+    int nValidEns = 0;
+    ivec validEns;
+    for(int e = 0; e < nEns; e++) {
+        int numInvalid = 0;
+        for(int y = 0; y < nY; y++) {
+            float value_l = background_l[y][e];
+            if(!gridpp::is_valid(value_l))
+                numInvalid++;
+        }
+        for(int i = 0; i < nS; i++) {
+            float value_r = pbackground_r[i][e];
+            if(!gridpp::is_valid(value_r))
+                numInvalid++;
+        }
+        if(numInvalid == 0) {
+            validEns.push_back(e);
+            nValidEns++;
+        }
+    }
+    if(nValidEns == 0)
+        return background_l;
+
+    // This causes segmentation fault when building the gridpp pypi package
+    // 1) Tested removing num_condition_warning and num_real_part_warning from the parallel loop
+    //    but it doesnt seem to help
+    // #pragma omp parallel for
+    for(int y = 0; y < nY; y++) {
+        float lat = blats[y];
+        float lon = blons[y];
+        float elev = belevs[y];
+        float laf = blafs[y];
+        Point p1 = bpoints.get_point(y);
+        /* float localizationRadius = structure.localization_distance(p1); */
+        float localizationRadius = 0;
+        if(which_structfun == 0) {
+            BarnesStructure structure = BarnesStructure( dh, dz, dw, hmax);
+            localizationRadius = structure.localization_distance(p1);
+        }
+        else if(which_structfun == 1) {
+            MixAStructure structure = MixAStructure( dh, dz, dw, hmax);
+            localizationRadius = structure.localization_distance(p1);
+        }
+
+        // Create list of locations for this gridpoint
+        ivec lLocIndices0 = points.get_neighbours(lat, lon, localizationRadius);
+        if(lLocIndices0.size() == 0) {
+            // If we have too few observations though, then use the background
+            continue;
+        }
+        ivec lLocIndices;
+        lLocIndices.reserve(lLocIndices0.size());
+        std::vector<std::pair<float,int> > lRhos0;
+
+        // Calculate gridpoint to observation rhos
+        lRhos0.reserve(lLocIndices0.size());
+        std::vector<Point> p2;
+        p2.reserve(lLocIndices0.size());
+        for(int i = 0; i < lLocIndices0.size(); i++) {
+            int index = lLocIndices0[i];
+            p2.push_back(point_vec[index]);
+        }
+        vec rhos(lLocIndices0.size());
+        if(which_structfun == 0) {
+            BarnesStructure structure = BarnesStructure( dh, dz, dw, hmax);
+            rhos = structure.corr_background(p1, p2);
+        }
+        else if(which_structfun == 1) {
+            MixAStructure structure = MixAStructure( dh, dz, dw, hmax);
+            rhos = structure.corr_background(p1, p2);
+        } 
+        for(int i = 0; i < lLocIndices0.size(); i++) {
+            int index = lLocIndices0[i];
+            if(gridpp::is_valid(pobs[index][0])) {
+                if(rhos[i] > 0) {
+                    lRhos0.push_back(std::pair<float,int>(rhos[i], i));
+                }
+            }
+        }
+
+        // Make sure we don't use too many observations
+        arma::vec lRhos;
+        if(max_points > 0 && lRhos0.size() > max_points) {
+            // If sorting is enabled and we have too many locations, then only keep the best ones based on rho.
+            // Otherwise, just use the last locations added
+            lRhos = arma::vec(max_points);
+            std::sort(lRhos0.begin(), lRhos0.end(), ::sort_pair_first<float,int>());
+            for(int i = 0; i < max_points; i++) {
+                // The best values start at the end of the array
+                int index = lRhos0[lRhos0.size() - 1 - i].second;
+                lLocIndices.push_back(lLocIndices0[index]);
+                lRhos(i) = lRhos0[lRhos0.size() - 1 - i].first;
+            }
+        }
+        else {
+            lRhos = arma::vec(lRhos0.size());
+            for(int i = 0; i < lRhos0.size(); i++) {
+                int index = lRhos0[i].second;
+                lLocIndices.push_back(lLocIndices0[index]);
+                lRhos(i) = lRhos0[i].first;
+            }
+        }
+
+        int lS = lLocIndices.size();
+        if(lS == 0) {
+            // If we have too few observations though, then use the background
+            continue;
+        }
+
+        vectype lElevs(lS);
+        vectype lLafs(lS);
+        for(int i = 0; i < lLocIndices.size(); i++) {
+            int index = lLocIndices[i];
+            lElevs[i] = pelevs[index];
+            lLafs[i] = plafs[index];
+        }
+
+        // Innovation: Observation - Background_r
+        mattype lInnov(lS, nValidEns, arma::fill::zeros);
+        // lR_dd: Observation error correlation matrix
+        mattype lR_dd(lS, lS, arma::fill::zeros);
+        // lCorr1D: background correlations between yth gridpoint and observations
+        mattype lCorr1D(1, lS, arma::fill::zeros); 
+        // lCorr2D: background correlations among observations
+        mattype lCorr2D(lS, lS, arma::fill::zeros); 
+        for(int i = 0; i < lS; i++) {
+            // lR_dd: diagonal observation error correlation matrix
+            lR_dd(i, i) = var_ratios_or;
+            // lCorr1D between yth gridpoint and ith observation computed before
+            lCorr1D(0, i) = lRhos(i);
+            // compute lInnov
+            int index = lLocIndices[i];
+            for(int e = 0; e < nValidEns; e++) {
+                int ei = validEns[e];
+                lInnov(i, ei) = pobs[index][ei] - pbackground_r[index][ei];
+            }
+            // compute lCorr2D
+            Point p1 = point_vec[index];
+            std::vector<Point> p2(lS, Point(0, 0));
+            for(int j = 0; j < lS; j++) {
+                int index_j = lLocIndices[j];
+                p2[j] = point_vec[index_j];
+            }
+            vec corr(lS);
+            if(which_structfun == 0) {
+                BarnesStructure structure = BarnesStructure( dh, dz, dw, hmax);
+                corr = structure.corr_background(p1, p2);
+            }
+            else if(which_structfun == 1) {
+                MixAStructure structure = MixAStructure( dh, dz, dw, hmax);
+                corr = structure.corr_background(p1, p2);
+            } 
+            /* vec corr = structure.corr(p1, p2); */
+            for(int j = 0; j < lS; j++) 
+                lCorr2D(i, j) = corr[j];
+        } // end loop over closer observations
+        // lK(1, lS): Kalman gain
+        mattype lK = lCorr1D * arma::inv(lCorr2D + lR_dd);
+        // dx(1, nValidEns): analysis increment 
+        mattype dx = std_ratios_lr * weight * (lK * lInnov);
+        ///////////////////////////////
+        // Anti-extrapolation filter //
+        ///////////////////////////////
+        if(!allow_extrapolation) {
+            arma::rowvec maxIncEns = arma::max(lInnov);
+            arma::rowvec minIncEns = arma::min(lInnov);
+
+            for(int e = 0; e < nValidEns; e++) {
+                float increment = dx[e];
+                float maxInc = maxIncEns[e];
+                float minInc = minIncEns[e];
+                if(maxInc > 0 && increment > maxInc) {
+                   increment = maxInc;
+                }
+                else if(maxInc < 0 && increment > 0) {
+                   increment = maxInc;
+                }
+                else if(minInc < 0 && increment < minInc) {
+                   increment = minInc;
+                }
+                else if(minInc > 0 && increment < 0) {
+                   increment = minInc;
+                }
+                dx[e] = increment;
+            }
+        }
+
+        // Compute the analysis as the updated background_l
+        for(int e = 0; e < nValidEns; e++) {
+            int ei = validEns[e];
+            output[y][ei] = background_l[y][ei] + dx[e];
+        }
+    } // end loop over gridpoint 
+    return output;
+} // end of R_optimal_interpolation_ensi_staticcorr_lr