Skip to content

Commit

Permalink
Vectorize operations for propensity score matching (#1179)
Browse files Browse the repository at this point in the history
* Add vector operations

Added todo comment

Signed-off-by: Rahul Shrestha <[email protected]>

formatting fix

Signed-off-by: Rahul Shrestha <[email protected]>

bug fix with string name

Signed-off-by: rahulbshrestha <[email protected]>

* Vectorize remaining list

Signed-off-by: rahulbshrestha <[email protected]>

---------

Signed-off-by: rahulbshrestha <[email protected]>
  • Loading branch information
rahulbshrestha authored Jun 4, 2024
1 parent 5d8fdd0 commit 72e3ba0
Showing 1 changed file with 16 additions and 14 deletions.
30 changes: 16 additions & 14 deletions dowhy/causal_estimators/propensity_score_matching_estimator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, List, Optional, Union

import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

Expand Down Expand Up @@ -120,7 +121,7 @@ def estimate_effect(

# TODO remove neighbors that are more than a given radius apart

# estimate ATT on treated by summing over difference between matched neighbors
# Estimating ATT on treated by summing over difference between matched neighbors
control_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
control[self.propensity_score_column].values.reshape(-1, 1)
)
Expand All @@ -129,27 +130,28 @@ def estimate_effect(
self.logger.debug(distances)

att = 0
numtreatedunits = treated.shape[0]
for i in range(numtreatedunits):
treated_outcome = treated.iloc[i][self._target_estimand.outcome_variable[0]].item()
control_outcome = control.iloc[indices[i]][self._target_estimand.outcome_variable[0]].item()
att += treated_outcome - control_outcome
outcome_variable = self._target_estimand.outcome_variable[0]
treated_outcomes = treated[outcome_variable]
control_outcomes = list(control.iloc[indices.flatten()][outcome_variable])

att = (treated_outcomes - control_outcomes).mean()

att /= numtreatedunits
# Estimating ATC

# Now computing ATC
treated_neighbors = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(
treated[self.propensity_score_column].values.reshape(-1, 1)
)
distances, indices = treated_neighbors.kneighbors(control[self.propensity_score_column].values.reshape(-1, 1))

atc = 0
numcontrolunits = control.shape[0]
for i in range(numcontrolunits):
control_outcome = control.iloc[i][self._target_estimand.outcome_variable[0]].item()
treated_outcome = treated.iloc[indices[i]][self._target_estimand.outcome_variable[0]].item()
atc += treated_outcome - control_outcome
outcome_variable = self._target_estimand.outcome_variable[0]
control_outcomes = control[outcome_variable]
treated_outcomes = list(treated.iloc[indices.flatten()][outcome_variable])

atc /= numcontrolunits
atc = (treated_outcomes - control_outcomes).mean()

numtreatedunits = treated.shape[0]
numcontrolunits = control.shape[0]

if target_units == "att":
est = att
Expand Down

0 comments on commit 72e3ba0

Please sign in to comment.