From 375817561de68b54be4b41ddbf6dbfc352d59360 Mon Sep 17 00:00:00 2001
From: Ken Takagiwa <ken@Kens-MacBook-Pro.local>
Date: Wed, 16 Jul 2014 11:17:02 -0700
Subject: [PATCH] add coment for hack why PYSPARK_PYTHON is needed in
 spark-submit

---
 bin/spark-submit | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/bin/spark-submit b/bin/spark-submit
index ac275b7696d5c..fa022f707e572 100755
--- a/bin/spark-submit
+++ b/bin/spark-submit
@@ -37,6 +37,16 @@ done
 
 DEPLOY_MODE=${DEPLOY_MODE:-"client"}
 
+
+# This is a hack to make DStream.pyprint work. 
+# This will be removed after pyprint is moved to PythonDStream.
+# Problem is that print function is in (Scala)DStream. 
+# Whenever python code is executed, we call PythonDStream which passes
+# pythonExec(which python Spark should execute).
+# Since pyprint is located in DStream, Spark does not know which python should use. 
+# In that case, get python path from PYSPARK_PYTHON, environmental variable. 
+# This fix is ongoing in print branch in my repo.
+
 # Figure out which Python executable to use
 if [[ -z "$PYSPARK_PYTHON" ]]; then
   PYSPARK_PYTHON="python"