Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial Implementation of a Data class #177

Merged
merged 9 commits into from
Jan 29, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 51 additions & 9 deletions exa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,21 @@
try:
_app.parse_command_line(sys.argv)
except SystemExit:
# _app fails to parse pytest command line
# so just pass the failure in this case.
pass
_base = os.path.dirname(__file__)
_base = os.path.abspath(os.path.dirname(__file__))
_path = os.path.join(_base, 'conf', 'config.py')
_app.load_config_file(_path)


class Base:
"""This base class provides a configured
"""This base mixin class provides a configured
log property and access to configuration
driven application settings without
forcing subclasses to be run explicitly
in the context of an application.
in the context of an application. It expects
to be mixed with a traitlets.config.Configurable
"""

@property
Expand All @@ -39,16 +42,31 @@ def log(self):
])
return logging.getLogger(name)

def traits(self, *args, **kws):
# inherent to traitlets API and
# of little concern to us here.
skipme = ['parent', 'config']
traits = super().traits(*args, **kws)
return {k: v for k, v in traits.items()
if k not in skipme}

def trait_items(self):
return {k: getattr(self, k)
for k in self.traits()}

def __init__(self, *args, **kws):
kws.pop('config', None)
# Allow over-writing config for dynamic
# classes at runtime
config = kws.pop('config', _app.config)
super().__init__(
*args, config=_app.config, **kws
*args, config=config, **kws
)


class Cfg(Base, Configurable):
logdir = Unicode().tag(config=True)
logname = Unicode().tag(config=True)
staticdir = Unicode()

@validate('logdir')
def _validate_logdir(self, prop):
Expand All @@ -61,6 +79,29 @@ def _default_logdir(self):
base = os.path.expanduser('~')
return os.path.join(base, '.exa')

@default('staticdir')
def _default_staticdir(self):
return os.path.join(_base, "static")

def resource(self, name):
"""Return the full path of a named resource
in the static directory.

If multiple files with the same name exist,
**name** should contain the first directory
as well.

.. code-block:: python

import exa
exa.cfg.resource("myfile")
exa.cfg.resource("test01/test.txt")
exa.cfg.resource("test02/test.txt")
"""
for path, _, files in os.walk(self.staticdir):
if name in files:
return os.path.abspath(os.path.join(path, name))


cfg = Cfg()
_path = os.path.join(_base, 'conf', 'logging.yml')
Expand All @@ -70,9 +111,10 @@ def _default_logdir(self):
_log['handlers']['file']['filename'] = _path
logging.config.dictConfig(_log)

from .core import Data
from ._version import __version__
from .core import (DataFrame, Series, Field3D, Field, Editor, Container,
TypedMeta, SparseDataFrame)

from .core import Data, Isotopes

#from ._version import __version__
#from .core import (DataFrame, Series, Field3D, Field, Editor, Container,
# TypedMeta, SparseDataFrame)
#
18 changes: 18 additions & 0 deletions exa/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015-2019, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
Useful pytest fixtures
#######################################
"""

import pytest

import exa


@pytest.fixture(scope='session')
def isotopes():
iso = exa.Isotopes()
iso.data()
return iso
8 changes: 4 additions & 4 deletions exa/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# Copyright (c) 2015-2019, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0

#from .numerical import DataFrame, Series, Field, Field3D, SparseDataFrame
#from .editor import Editor
#from .container import Container, TypedMeta
from .numerical import DataFrame, Series, Field, Field3D, SparseDataFrame
from .editor import Editor
from .container import Container, TypedMeta

from .data import Data
from .data import Data, Isotopes
103 changes: 99 additions & 4 deletions exa/core/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,106 @@
Data
########
"""
from traitlets import Unicode, Integer, Float
from traitlets import Unicode, Instance, Integer, Float, Any
from traitlets import validate, default, observe
from traitlets import TraitError
from traitlets.config import Configurable
import pandas as pd

from exa import Base
import exa


class Data(Base, Configurable):
class Data(exa.Base, Configurable):
"""An interface to separate data provider routing
logic and simplify managing multiple data concepts
in the container.
"""
myvar = Integer(5).tag(config=True)
name = Unicode()
source = Any(allow_none=True)
# TODO : port the concept of _index,
# _categories, _columns to
# traits and set up validators
# to get back strong-typing behavior
# inside of the dataframe
# this likely involves making _data a
# first class trait so we can observe
# when it is updated.

@validate('source')
def _validate_source(self, prop):
"""source must implement __call__"""
if not callable(prop['value']):
raise TraitError("source must be callable")
return prop['value']

@observe('source')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really like this use of these event triggers.

def _observe_source(self, prop):
"""Invalidate the stored data if source changes"""
self._data = None

@validate('name')
def _validate_name(self, prop):
return prop['value'].lower()

@default('name')
def _default_name(self):
return self.__class__.__name__

def data(self, df=None):
"""Return the currently stored data in the
Data object. If df is provided, store that
as the current data and return it. Otherwise,
determine the provider to execute to obtain
the data, store it and return it.
"""
# if provided, store df in Data and return it
if df is not None:
self._data = df
# otherwise, lazily evaluate source provider
elif self._data is None:
if self.source is not None:
# TODO: reintroduce the automated getattr(attr, getter_attr())
# with _getter_prefix behavior on a
# base class somewhere that providers
# inherit from
# Alternatively; implement a __call__ method
# on sources which manages that on a per
# provider basis. The way the validate is
# set right now forces the latter approach
# To support arbitrary callables we should add support
# for source's *args and **kws
self._data = self.source()
# return the now cached data
return self._data

def __init__(self, *args, df=None, **kws):
self._data = df
super().__init__(*args, **kws)



def load_isotopes():
"""Minimal working example of a pluggable
callable to serve as a data provider in the
Data API.
"""
path = exa.cfg.resource('isotopes.json')
df = pd.read_json(path, orient='values')
df.columns = ('A', 'Z', 'af', 'afu',
'cov_radius', 'van_radius', 'g',
'mass', 'massu', 'name', 'eneg',
'quad', 'spin', 'symbol', 'color')
return df.sort_values(by=['symbol', 'A']).reset_index(drop=True)

class Isotopes(Data):
"""An isotopes database data object.

.. code-block:: python

import exa
df = exa.Isotopes().data()
"""

def __init__(self, *args, **kws):
source = kws.pop('source', load_isotopes)
super().__init__(*args, source=source, **kws)
tjduigna marked this conversation as resolved.
Show resolved Hide resolved
29 changes: 29 additions & 0 deletions exa/core/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015-2019, Exa Analytics Development Team
# Distributed under the terms of the Apache License 2.0
"""
Tests for :mod:`~exa.core.data`
#######################################
"""

import pytest

import exa


def test_data():
d = exa.Data()
assert d.data() is None


def test_isotopes():
assert not exa.Isotopes().data().empty


def test_compare(isotopes):
from exa.util import isotopes as orig
orig = orig.as_df()
df = isotopes.data()
subset = ['symbol', 'Z', 'cov_radius',
'van_radius', 'color']
assert df[subset].equals(orig[subset])