-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #87 from Keviinplz/feature/df-overall-columns-sema…
…ntics [INRIA Internship] Dataframe Columns Usage
- Loading branch information
Showing
17 changed files
with
492 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,3 +95,9 @@ ENV/ | |
|
||
# generated docs | ||
/docs/_build/ | ||
|
||
# examples | ||
/src/lyra/tests/example.py | ||
|
||
# mac | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,141 @@ | ||
import itertools | ||
|
||
from lyra.abstract_domains.state import State | ||
from lyra.abstract_domains.usage.dataframe_usage_domain import DataFrameColumnUsageState | ||
from lyra.core.statements import Call | ||
from lyra.core.expressions import Subscription, Literal, VariableIdentifier, ListDisplay, BinaryArithmeticOperation, Input | ||
from lyra.core.statements import Call, SubscriptionAccess, SlicingAccess, VariableAccess | ||
from lyra.core.types import ( | ||
StringLyraType, | ||
ListLyraType, | ||
SetLyraType, | ||
DictLyraType, | ||
TupleLyraType, | ||
DataFrameLyraType, | ||
) | ||
from lyra.engine.interpreter import Interpreter | ||
from lyra.semantics.backward import DefaultPandasBackwardSemantics | ||
|
||
|
||
class DataFrameColumnUsageSemantics(DefaultPandasBackwardSemantics): | ||
"""Backward semantics of statements with support for Pandas library calls for dataframe column usage analysis.""" | ||
|
||
def drop_call_semantics(self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter) -> DataFrameColumnUsageState: | ||
def _summarized_view( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
dfs = self.semantics(stmt.arguments[0], state, interpreter).result | ||
return state.output(dfs) | ||
|
||
def subscription_access_semantics( | ||
self, | ||
stmt: SubscriptionAccess, | ||
state: DataFrameColumnUsageState, | ||
interpreter: Interpreter, | ||
) -> DataFrameColumnUsageState: | ||
target = self.semantics(stmt.target, state, interpreter).result | ||
key = self.semantics(stmt.key, state, interpreter).result | ||
result = set() | ||
for primary, index in itertools.product(target, key): | ||
if not isinstance(primary.typ, DataFrameLyraType): | ||
error = ( | ||
f"Semantics for subscription of {primary} is not yet implemented!" | ||
) | ||
raise NotImplementedError(error) | ||
if isinstance(index, ListDisplay): | ||
for idx in index.items: | ||
subscription = Subscription(primary.typ, primary, idx) | ||
result.add(subscription) | ||
elif isinstance(index, (Literal, VariableIdentifier)): | ||
subscription = Subscription(primary.typ, primary, index) | ||
result.add(subscription) | ||
else: | ||
error = f"Semantics for subscription of {primary} and {index} is not yet implemented!" | ||
raise NotImplementedError(error) | ||
|
||
state.result = result | ||
return state | ||
|
||
def drop_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
dataframes = self.semantics(stmt.arguments[0], state, interpreter).result | ||
columns = self.semantics(stmt.arguments[1], state, interpreter).result | ||
return state.drop_dataframe_column(dataframes, columns) | ||
|
||
def head_call_semantics(self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter) -> DataFrameColumnUsageState: | ||
dataframes = self.semantics(stmt.arguments[0], state, interpreter).result | ||
return state.output(dataframes) | ||
def head_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
return self._summarized_view(stmt, state, interpreter) | ||
|
||
def hist_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
return self._summarized_view(stmt, state, interpreter) | ||
|
||
def tail_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
return self._summarized_view(stmt, state, interpreter) | ||
|
||
def describe_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
return self._summarized_view(stmt, state, interpreter) | ||
|
||
def info_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
return self._summarized_view(stmt, state, interpreter) | ||
|
||
def min_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
dfs = self.semantics(stmt.arguments[0], state, interpreter).result | ||
state.result = {df for df in dfs} | ||
return state | ||
|
||
def max_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
dfs = self.semantics(stmt.arguments[0], state, interpreter).result | ||
state.result = {df for df in dfs} | ||
return state | ||
|
||
def median_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
dfs = self.semantics(stmt.arguments[0], state, interpreter).result | ||
state.result = {df for df in dfs} | ||
return state | ||
|
||
def fillna_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
dfs = self.semantics(stmt.arguments[0], state, interpreter).result | ||
state.result = {df for df in dfs} | ||
return state | ||
|
||
def replace_call_semantics(self, stmt: Call, state: DataFrameColumnUsageState, | ||
interpreter: Interpreter) -> DataFrameColumnUsageState: | ||
dfs = self.semantics(stmt.arguments[0], state, interpreter).result | ||
state.result = {df for df in dfs} | ||
return state | ||
|
||
def read_csv_call_semantics(self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter) -> DataFrameColumnUsageState: | ||
return state # TODO | ||
def concat_call_semantics(self, stmt: Call, state: DataFrameColumnUsageState, | ||
interpreter: Interpreter) -> DataFrameColumnUsageState: | ||
# Concat always recieves a sequence (or mapping) of dfs | ||
lists_dfs = self.semantics(stmt.arguments[1], state, interpreter).result | ||
result = set() | ||
for lists in lists_dfs: | ||
if not isinstance(lists, ListDisplay): | ||
error = f"Semantics for subscription of {list} is not yet implemented!" | ||
raise NotImplementedError(error) | ||
|
||
result.update(lists.items) | ||
state.result = result | ||
return state | ||
|
||
def read_csv_call_semantics( | ||
self, stmt: Call, state: DataFrameColumnUsageState, interpreter: Interpreter | ||
) -> DataFrameColumnUsageState: | ||
state.result = {Input(typ=StringLyraType())} | ||
return state |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
""" | ||
Data Usage Analysis Dataframes - Unit Tests | ||
====================================== | ||
:Authors: Caterina Urban and Kevin Pinochet | ||
""" | ||
|
||
|
||
import glob | ||
import os | ||
import unittest | ||
|
||
import sys | ||
|
||
from lyra.abstract_domains.usage.dataframe_usage_domain import DataFrameColumnUsageState | ||
from lyra.engine.backward import BackwardInterpreter | ||
from lyra.semantics.dataframe_usage_semantics import DataFrameColumnUsageSemantics | ||
from lyra.unittests.runner import TestRunner | ||
|
||
|
||
class UsageTest(TestRunner): | ||
|
||
def interpreter(self): | ||
return BackwardInterpreter(self.cfgs, self.fargs, DataFrameColumnUsageSemantics(), 3) | ||
|
||
def state(self): | ||
return DataFrameColumnUsageState(self.variables) | ||
|
||
def test_suite(): | ||
suite = unittest.TestSuite() | ||
name = os.getcwd() + '/usage/dataframes/**.py' | ||
for path in glob.iglob(name): | ||
if os.path.basename(path) != "__init__.py": | ||
print(os.path.basename(path)) | ||
suite.addTest(UsageTest(path)) | ||
# name = os.getcwd() + '/usage/fulara/**.py' | ||
# for path in glob.iglob(name): | ||
# if os.path.basename(path) != "__init__.py": | ||
# print('fulara/' + os.path.basename(path)) | ||
# suite.addTest(FularaIntervalUsageTest(path)) | ||
return suite | ||
|
||
|
||
if __name__ == '__main__': | ||
runner = unittest.TextTestRunner() | ||
result = runner.run(test_suite()) | ||
if not result.wasSuccessful(): | ||
sys.exit(1) |
Empty file.
12 changes: 12 additions & 0 deletions
12
src/lyra/unittests/usage/dataframes/handcraft_example_1.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import pandas as pd | ||
|
||
# INITIAL: df -> {_: W} | ||
df: pd.DataFrame = pd.read_csv("...") | ||
|
||
# STATE: df -> {"id": N, "t": U, _: N} | ||
df.drop(['id'], axis=1, inplace=True) | ||
|
||
# STATE: df -> {"t": U, _: N} | ||
df["t"].head() | ||
|
||
# FINAL: df -> {_: N} |
Oops, something went wrong.