astropy · aphearin · Jun 8, 2018 · Jun 6, 2018 · Jun 6, 2018 · Jun 6, 2018
diff --git a/.gitignore b/.gitignore
@@ -43,11 +43,12 @@ distribute-*.tar.gz
 # Other
 .*.swp
 *~
-temporary_testing_script.py 
+temporary_testing_script.py
 *.ipynb
 .ipynb_checkpoints/*
 */.ipynb_checkpoints/*
 */*/.ipynb_checkpoints/*
+.pytest_cache/*
 
 # Mac OSX
 .DS_Store
diff --git a/halotools/empirical_models/abunmatch/bin_free_cam.py b/halotools/empirical_models/abunmatch/bin_free_cam.py
@@ -3,12 +3,12 @@
 import numpy as np
 from ...utils import unsorting_indices
 from ...utils.conditional_percentile import _check_xyn_bounds, rank_order_function
-from .engines import cython_bin_free_cam_kernel
+from .engines import cython_bin_free_cam_kernel, get_value_at_rank
 from .tests.naive_python_cam import sample2_window_indices
 
 
 def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
-            assume_x_is_sorted=False, assume_x2_is_sorted=False):
+            assume_x_is_sorted=False, assume_x2_is_sorted=False, return_indexes=False):
     r"""
     Given a set of input points with primary property `x` and secondary property `y`,
     use conditional abundance matching to map new values `ynew` onto the input points
@@ -78,6 +78,9 @@ def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
     `~halotools.empirical_models.conditional_abunmatch_bin_based`.
 
     """
+    if (return_indexes and add_subgrid_noise):
+        raise ValueError("Can't add subgrid noise when returning indexes")
+
     x, y, nwin = _check_xyn_bounds(x, y, nwin)
     x2, y2, nwin = _check_xyn_bounds(x2, y2, nwin)
     nhalfwin = int(nwin/2)
@@ -102,27 +105,48 @@ def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
     i2_matched = np.searchsorted(x2_sorted, x_sorted).astype('i4')
 
     result = np.array(cython_bin_free_cam_kernel(
-        y_sorted, y2_sorted, i2_matched, nwin, int(add_subgrid_noise)))
+        y_sorted, y2_sorted, i2_matched, nwin, int(add_subgrid_noise), int(return_indexes)))
 
     #  Finish the leftmost points in pure python
     iw = 0
+    leftmost_window_ranks = rank_order_function(y_sorted[:nwin])
     for ix1 in range(0, nhalfwin):
         iy2_low, iy2_high = sample2_window_indices(ix1, x_sorted, x2_sorted, nwin)
-        leftmost_sorted_window_y2 = np.sort(y2_sorted[iy2_low:iy2_high])
-        leftmost_window_ranks = rank_order_function(y_sorted[:nwin])
-        result[ix1] = leftmost_sorted_window_y2[leftmost_window_ranks[iw]]
+
+        if return_indexes:
+            leftmost_window_sorting_indexes = np.argsort(y2_sorted[iy2_low:iy2_high])
+            result[ix1] = iy2_low + leftmost_window_sorting_indexes[
+                    leftmost_window_ranks[iw]
+            ]
+        else:
+            leftmost_sorted_window_y2 = np.sort(y2_sorted[iy2_low:iy2_high])
+            result[ix1] = get_value_at_rank(leftmost_sorted_window_y2, leftmost_window_ranks[iw], nwin, int(add_subgrid_noise))
+
         iw += 1
 
     #  Finish the rightmost points in pure python
     iw = nhalfwin + 1
+    rightmost_window_ranks = rank_order_function(y_sorted[-nwin:])
     for ix1 in range(npts1-nhalfwin, npts1):
         iy2_low, iy2_high = sample2_window_indices(ix1, x_sorted, x2_sorted, nwin)
-        rightmost_sorted_window_y2 = np.sort(y2_sorted[iy2_low:iy2_high])
-        rightmost_window_ranks = rank_order_function(y_sorted[-nwin:])
-        result[ix1] = rightmost_sorted_window_y2[rightmost_window_ranks[iw]]
+
+        if return_indexes:
+            rightmost_window_sorting_indexes = np.argsort(y2_sorted[iy2_low:iy2_high])
+            result[ix1] = iy2_low + rightmost_window_sorting_indexes[
+                    rightmost_window_ranks[iw]
+            ]
+        else:
+            rightmost_sorted_window_y2 = np.sort(y2_sorted[iy2_low:iy2_high])
+            result[ix1] = get_value_at_rank(rightmost_sorted_window_y2, rightmost_window_ranks[iw], nwin, int(add_subgrid_noise))
         iw += 1
 
-    if assume_x_is_sorted:
+
+    if return_indexes:
+        # The result indexes point to the location in y2_sorted. Undo that if required
+        result = result if assume_x2_is_sorted else idx_x2_sorted[result]
+        # The result indexes are ordered like y_sorted. Undo that if required
+        result = result if assume_x_is_sorted else result[unsorting_indices(idx_x_sorted)]
         return result
     else:
-        return result[unsorting_indices(idx_x_sorted)]
+        # The result values are ordered like y_sorted, Undo that if required
+        return result if assume_x_is_sorted else result[unsorting_indices(idx_x_sorted)]
diff --git a/halotools/empirical_models/abunmatch/engines/__init__.py b/halotools/empirical_models/abunmatch/engines/__init__.py
@@ -1 +1 @@
-from .bin_free_cam_kernel import cython_bin_free_cam_kernel
+from .bin_free_cam_kernel import cython_bin_free_cam_kernel, get_value_at_rank
diff --git a/halotools/empirical_models/abunmatch/engines/bin_free_cam_kernel.pyx b/halotools/empirical_models/abunmatch/engines/bin_free_cam_kernel.pyx
@@ -6,7 +6,7 @@ import numpy as np
 cimport cython
 from ....utils import unsorting_indices
 
-__all__ = ('cython_bin_free_cam_kernel', )
+__all__ = ('cython_bin_free_cam_kernel', 'get_value_at_rank')
 
 
 cdef double random_uniform():
@@ -110,11 +110,35 @@ def setup_initial_indices(int iy, int nwin, int npts):
     return iy, init_iy_low, init_iy_high
 
 
+@cython.boundscheck(False)
+@cython.nonecheck(False)
+@cython.wraparound(False)
+cdef int _find_index(int[:] arr, int val):
+    for i in range(len(arr)):
+        if arr[i] == val:
+            return i
+    return -1
+
+
+@cython.boundscheck(False)
+@cython.nonecheck(False)
+@cython.wraparound(False)
+def get_value_at_rank(double[:] sorted_values, int rank1, int nwin, int add_subgrid_noise):
+    if add_subgrid_noise == 0:
+        return sorted_values[rank1]
+    else:
+        low_rank = max(rank1 - 1, 0)
+        high_rank = min(rank1 + 1, nwin - 1)
+        low_cdf = sorted_values[low_rank]
+        high_cdf = sorted_values[high_rank]
+        return low_cdf + (high_cdf-low_cdf)*random_uniform()
+
+
 @cython.boundscheck(False)
 @cython.nonecheck(False)
 @cython.wraparound(False)
 def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int nwin,
-            int add_subgrid_noise=0):
+            int add_subgrid_noise, int return_indexes):
     """ Kernel underlying the bin-free implementation of conditional abundance matching.
     For the i^th element of y1, we define a window of length `nwin` surrounding the
     point y1[i], and another window surrounding y2[i2_match[i]]. We calculate the
@@ -159,7 +183,9 @@ def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int
     cdef int idx_in1, idx_out1, idx_in2, idx_out2
     cdef double value_in1, value_out1, value_in2, value_out2
 
-    cdef double[:] y1_new = np.zeros(npts1, dtype='f8') - 1
+    cdef int[:] y1_new_indexes = np.zeros(npts1, dtype='i4') - 1
+    cdef double[:] y1_new_values = np.zeros(npts1, dtype='f8') - 1
+
     cdef int rank1, rank2
 
     #  Set up initial window arrays for y1
@@ -201,58 +227,47 @@ def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int
     for iy1 in range(nhalfwin, npts1-nhalfwin):
 
         rank1 = correspondence_indx1[nhalfwin]
-        iy2_match = i2_match[iy1]
-
-        #  Stop updating the second window once we reach npts2-nwin/2
-        if iy2_match > iy2_max:
-            iy2_match = iy2_max
+        #  Where to center the second window (making sure we don't slide off the end)
+        iy2_match = min(i2_match[iy1], iy2_max)
 
-        if iy2 > iy2_max:
-            iy2 = iy2_max
-        else:
-            #  Continue to slide the window along the second array
-            #  until we find the matching point, updating the window with each iteration
-            while iy2 < iy2_match:
+        #  Continue to slide the window along the second array
+        #  until we find the matching point, updating the window with each iteration
+        while iy2 < iy2_match:
 
-                #  Find the value coming in and the value coming out
-                value_in2 = y2[iy2 + nhalfwin + 1]
-                idx_out2 = correspondence_indx2[nwin-1]
-                value_out2 = sorted_cdf_values2[idx_out2]
+            #  Find the value coming in and the value coming out
+            value_in2 = y2[iy2 + nhalfwin + 1]
+            idx_out2 = correspondence_indx2[nwin-1]
+            value_out2 = sorted_cdf_values2[idx_out2]
 
-                #  Find the position where we will insert the new point into the second window
-                idx_in2 = _bisect_left_kernel(sorted_cdf_values2, value_in2)
-                if value_in2 > value_out2:
-                    idx_in2 -= 1
+            #  Find the position where we will insert the new point into the second window
+            idx_in2 = _bisect_left_kernel(sorted_cdf_values2, value_in2)
+            if value_in2 > value_out2:
+                idx_in2 -= 1
 
-                #  Update the correspondence array
-                _insert_first_pop_last_kernel(&correspondence_indx2[0], idx_in2, nwin)
-                for j in range(1, nwin):
-                    idx2 = correspondence_indx2[j]
-                    correspondence_indx2[j] += _correspondence_indices_shift(
-                        idx_in2, idx_out2, idx2)
+            #  Update the correspondence array
+            _insert_first_pop_last_kernel(&correspondence_indx2[0], idx_in2, nwin)
+            for j in range(1, nwin):
+                idx2 = correspondence_indx2[j]
+                correspondence_indx2[j] += _correspondence_indices_shift(
+                    idx_in2, idx_out2, idx2)
 
-                #  Update the CDF window
-                _insert_pop_kernel(&sorted_cdf_values2[0], idx_in2, idx_out2, value_in2)
+            #  Update the CDF window
+            _insert_pop_kernel(&sorted_cdf_values2[0], idx_in2, idx_out2, value_in2)
 
-                iy2 += 1
+            iy2 += 1
 
         #  The array sorted_cdf_values2 is now centered on the correct point of y2
         #  We have already calculated the rank-order of the point iy1, rank1
         #  So we either directly map sorted_cdf_values2[rank1] to ynew,
         #  or alternatively we randomly draw a value between
         #  sorted_cdf_values2[rank1-1] and sorted_cdf_values2[rank1+1]
-        if add_subgrid_noise == 0:
-            y1_new[iy1] = sorted_cdf_values2[rank1]
+        if return_indexes == 1:
+            index = _find_index(correspondence_indx2, rank1)
+            if index == -1:
+                raise Exception("Index {} not found in correspondence_indx2".format(rank1))
+            y1_new_indexes[iy1] = iy2 + nhalfwin - index
         else:
-            low_rank = rank1 - 1
-            high_rank = rank1 + 1
-            if low_rank < 0:
-                low_rank = 0
-            elif high_rank >= nwin:
-                high_rank = nwin - 1
-            low_cdf = sorted_cdf_values2[low_rank]
-            high_cdf = sorted_cdf_values2[high_rank]
-            y1_new[iy1] = low_cdf + (high_cdf-low_cdf)*random_uniform()
+            y1_new_values[iy1] = get_value_at_rank(sorted_cdf_values2, rank1, nwin, add_subgrid_noise)
 
         #  Move on to the next value in y1
 
@@ -276,4 +291,6 @@ def cython_bin_free_cam_kernel(double[:] y1, double[:] y2, int[:] i2_match, int
         #  Update the CDF window
         _insert_pop_kernel(&sorted_cdf_values1[0], idx_in1, idx_out1, value_in1)
 
-    return y1_new
+    if return_indexes:
+        return y1_new_indexes
+    return y1_new_values
diff --git a/halotools/empirical_models/abunmatch/tests/test_bin_free_cam.py b/halotools/empirical_models/abunmatch/tests/test_bin_free_cam.py
@@ -2,13 +2,15 @@
 """
 import numpy as np
 from astropy.utils.misc import NumpyRNGContext
+import pytest
 from ..bin_free_cam import conditional_abunmatch
 from ....utils.conditional_percentile import cython_sliding_rank, rank_order_function
 from .naive_python_cam import pure_python_rank_matching
 from ....utils import unsorting_indices
 
 
 fixed_seed = 43
+fixed_seed2 = 44
 
 
 def test1():
@@ -384,11 +386,13 @@ def test_subgrid_noise1():
     result2 = conditional_abunmatch(x, y, x2, y2, nwin1, add_subgrid_noise=True)
     assert np.allclose(result, result2, atol=0.1)
     assert not np.allclose(result, result2, atol=0.02)
+    assert np.all(result - result2 != 0)
 
     nwin2 = 1001
     result = conditional_abunmatch(x, y, x2, y2, nwin2, add_subgrid_noise=False)
     result2 = conditional_abunmatch(x, y, x2, y2, nwin2, add_subgrid_noise=True)
     assert np.allclose(result, result2, atol=0.02)
+    assert np.all(result - result2 != 0)
 
 
 def test_initial_sorting1():
@@ -503,3 +507,40 @@ def test_initial_sorting4():
         assume_x_is_sorted=True, assume_x2_is_sorted=True,
         add_subgrid_noise=False)
     assert np.allclose(result, result4[unsorting_indices(idx_x_sorted)])
+
+def test_no_subgrid_noise_with_return_indexes():
+    x, y = np.arange(5), np.arange(5)
+    x2, y2 = np.arange(10), np.arange(10)
+    nwin = 3
+    with pytest.raises(ValueError) as err:
+        conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True, return_indexes=True)
+    assert str(err.value) == "Can't add subgrid noise when returning indexes"
+
+
+def test_return_indexes():
+    n1, n2 = int(1e2), int(1e2)
+
+    with NumpyRNGContext(fixed_seed):
+        x = np.random.uniform(0, 10, n1)
+        y = np.random.uniform(0, 1, n1)
+
+    with NumpyRNGContext(fixed_seed2):
+        x2 = np.random.uniform(0, 10, n2)
+        y2 = np.random.uniform(-4, -3, n2)
+
+    nwin = 9
+    for sorted_x in [False, True]:
+        for sorted_x2 in [False, True]:
+            x_, y_, x2_, y2_ = x, y, x2, y2
+            if sorted_x:
+                x_, y_ = np.sort(x_), np.sort(y_)
+            if sorted_x2:
+                x2_, y2_ = np.sort(x2_), np.sort(y2_)
+
+
+            values = conditional_abunmatch(x_, y_, x2_, y2_, nwin, add_subgrid_noise=False,
+                    assume_x_is_sorted=sorted_x, assume_x2_is_sorted=sorted_x2, return_indexes=False)
+            indexes = conditional_abunmatch(x_, y_, x2_, y2_, nwin, add_subgrid_noise=False,
+                    assume_x_is_sorted=sorted_x, assume_x2_is_sorted=sorted_x2, return_indexes=True)
+
+            assert np.all(y2_[indexes] == values), "{}, {}".format(sorted_x, sorted_x2)