ray-project · ericl · Jun 24, 2022 · Jun 22, 2022 · Jun 24, 2022
@@ -75,11 +75,7 @@
 batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint, XGBoostPredictor)
 
 # Bulk batch prediction.
-predicted_labels = (
-    batch_predictor.predict(test_dataset)
-    .map_batches(lambda df: (df > 0.5).astype(int), batch_format="pandas")
-    .to_pandas(limit=float("inf"))
-)
+predicted_probabilities = batch_predictor.predict(test_dataset)
 
 # Pipelined batch prediction: instead of processing the data in bulk, process it
 # incrementally in windows of the given size.

@@ -88,19 +88,17 @@
 test_dataset = ray.data.from_items([{"x": x} for x in range(2, 32, 3)])
 
 batch_predictor = BatchPredictor.from_checkpoint(checkpoint, XGBoostPredictor)
-predicted_labels = batch_predictor.predict(test_dataset)
-print(predicted_labels.to_pandas())
-#    predictions
-# 0     0.098437
-# 1     5.604667
-# 2    11.405312
-# 3    15.684700
-# 4    23.990948
-# 5    29.900211
-# 6    34.599442
-# 7    40.696899
-# 8    45.681076
-# 9    50.290031
+predicted_probabilities = batch_predictor.predict(test_dataset)
+predicted_probabilities.show()
+# {'predictions': 0.09843720495700836}
+# {'predictions': 5.604666709899902}
+# {'predictions': 11.405311584472656}
+# {'predictions': 15.684700012207031}
+# {'predictions': 23.990947723388672}
+# {'predictions': 29.900211334228516}
+# {'predictions': 34.59944152832031}
+# {'predictions': 40.6968994140625}
+# {'predictions': 45.68107604980469}
 # __predictor_end__
 
 

@@ -105,7 +105,7 @@ def train_func(config: dict):
 
 predictions = batch_predictor.predict(prediction_dataset, dtype=tf.float32)
 
-pandas_predictions = predictions.to_pandas(float("inf"))
+print("PREDICTIONS")
+predictions.show()
 
-print(f"PREDICTIONS\n{pandas_predictions}")
 # __air_tf_batchpred_end__
@@ -59,17 +59,11 @@
 
 batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint, XGBoostPredictor)
 
-predicted_labels = (
-    batch_predictor.predict(test_dataset)
-    .map_batches(lambda df: (df > 0.5).astype(int), batch_format="pandas")
-    .to_pandas(limit=float("inf"))
-)
-print("PREDICTED LABELS")
-print(f"{predicted_labels}")
+predicted_probabilities = batch_predictor.predict(test_dataset)
+print("PREDICTED PROBABILITIES")
+predicted_probabilities.show()
 
-shap_values = batch_predictor.predict(test_dataset, pred_contribs=True).to_pandas(
-    limit=float("inf")
-)
+shap_values = batch_predictor.predict(test_dataset, pred_contribs=True)
 print("SHAP VALUES")
-print(f"{shap_values}")
+shap_values.show()
 # __air_xgb_batchpred_end__