From 42341fb9e109ddf77e949c8453de2a30c9e4e71f Mon Sep 17 00:00:00 2001 From: George Dittmar Date: Wed, 8 Jul 2015 23:28:26 -0700 Subject: [PATCH] refactoring arg max check to better handle zero values --- .../apache/spark/mllib/linalg/Vectors.scala | 30 +++---------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 2c8891cef93ab..e4ba9a243737d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -724,7 +724,6 @@ class SparseVector( if (size == 0) { -1 } else { - var maxIdx = indices(0) var maxValue = values(0) @@ -735,33 +734,12 @@ class SparseVector( } } - // look for inactive values in case all active node values are negative - if (size != values.size && maxValue <= 0) { - val firstInactiveIdx = calcFirstInactiveIdx(0) - if (!(maxValue == 0 && firstInactiveIdx >= maxIdx)) { - maxIdx = firstInactiveIdx - } - maxValue = 0 + var k = 0 + while (k < indices.length && indices(k) == k && values(k) != 0.0) { + k += 1 } - maxIdx - } - } - /** - * Calculates the first instance of an inactive node in a sparse vector and returns the Idx - * of the element. - * @param idx starting index of computation - * @return index of first inactive node - */ - private[SparseVector] def calcFirstInactiveIdx(idx: Int): Int = { - if (idx < size) { - if (!indices.contains(idx)) { - idx - } else { - calcFirstInactiveIdx(idx + 1) - } - } else { - -1 + if (maxValue <= 0.0 || k >= maxIdx) k else maxIdx } } }