Skip to content

Commit

Permalink
Merge pull request #106 from synthicity/use-pandana
Browse files Browse the repository at this point in the history
Use pandana
  • Loading branch information
fscottfoti committed Sep 2, 2014
2 parents 0b3cf9c + a382bb9 commit e5e2037
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 117 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ install:
- sudo conda init
- sudo conda update --yes conda
- |
conda create -p $HOME/py --yes ipython-notebook jinja2 matplotlib numpy pandas patsy pip scipy statsmodels pytables pytest pyyaml toolz "python=$TRAVIS_PYTHON_VERSION"
conda create -p $HOME/py --yes ipython-notebook jinja2 matplotlib numpy pandas patsy pip scipy statsmodels pandana pytables pytest pyyaml toolz "python=$TRAVIS_PYTHON_VERSION" -c "synthicity"
- export PATH=$HOME/py/bin:$PATH
- pip install simplejson bottle
- pip install pytest-cov coveralls pep8
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,8 @@
'statsmodels>=0.5.0',
'tables>=3.1.0',
'toolz>=0.7.0'
]
],
extras_require = {
'pandana': ['pandana>=0.1']
}
)
129 changes: 14 additions & 115 deletions urbansim/utils/networks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import cPickle
import yaml

import numpy as np
Expand All @@ -11,46 +10,32 @@

logger = logging.getLogger(__name__)

NETWORKS = None


def from_yaml(cfgname):
def from_yaml(net, cfgname):
print "Computing accessibility variables"
cfg = yaml.load(open(misc.config(cfgname)))

nodes = pd.DataFrame(index=NETWORKS.external_nodeids)
nodes = pd.DataFrame(index=net.node_ids)

node_col = cfg.get('node_col', None)
assert "node_col" in cfg, "Need to specify from where to take the node id"
node_col = cfg.get('node_col')

for variable in cfg['variable_definitions']:

name = variable["name"]
print "Computing %s" % name

decay = {
"exponential": "DECAY_EXP",
"linear": "DECAY_LINEAR",
"flat": "DECAY_FLAT"
}.get(variable.get("decay", "linear"))

agg = {
"sum": "AGG_SUM",
"average": "AGG_AVE",
"stddev": "AGG_STDDEV"
}.get(variable.get("aggregation", "sum"))

decay = variable.get("decay", "linear")
agg = variable.get("aggregation", "sum")
vname = variable.get("varname", None)

radius = variable["radius"]

dfname = variable["dataframe"]

flds = [vname] if vname else []
if 'add_fields' in variable:
flds += variable['add_fields']
if node_col:
flds.append(node_col)
logger.info(" Fields available to accvar =", ', '.join(flds))
flds.append(node_col)
if "filters" in variable:
flds += util.columns_in_filters(variable["filters"])
logger.info(" Fields available to aggregate =", ', '.join(flds))

df = sim.get_table(dfname).to_frame(flds)

Expand All @@ -62,98 +47,12 @@ def from_yaml(cfgname):
logger.info(" radius = %s, aggregation = %s, decay = %s" % (
radius, agg, decay))

nodes[name] = NETWORKS.accvar(
df, radius, node_ids=node_col, agg=agg, decay=decay,
vname=vname).astype('float').values
# set the variable
net.set(df[node_col], variable=df[vname] if vname else None)
# aggregate it
nodes[name] = net.aggregate(radius, type=agg, decay=decay)

if "apply" in variable:
nodes[name] = nodes[name].apply(eval(variable["apply"]))

return nodes


class Networks:

# flatten_nodeids is used when there is one graph to make a list of nodeids
# rather than a list of lists - it doesn't work right now unfortunately
def __init__(self, filenames, factors, maxdistances, twoway,
impedances=None, flatten_nodeids=False):
if not filenames:
return
from pyaccess.pyaccess import PyAccess
self.pya = PyAccess()
self.pya.createGraphs(len(filenames))
if impedances is None:
impedances = [None] * len(filenames)
self.nodeids = []
self.external_nodeids = []
for num, filename, factor, maxdistance, twoway, impedance in \
zip(range(len(filenames)), filenames, factors, maxdistances,
twoway, impedances):
net = cPickle.load(open(filename))
if impedance is None:
impedance = "net['edgeweights']"
impedance = eval(impedance)
self.pya.createGraph(
num, net['nodeids'], net['nodes'], net['edges'],
impedance * factor, twoway=twoway)
if len(filenames) == 1 and flatten_nodeids:
self.nodeids = net['nodeids']
else:
# these are the internal ids
self.nodeids += zip([num] * len(net['nodeids']),
range(len(net['nodeids'])))
self.external_nodeids.append(net['nodeids'])
self.pya.precomputeRange(maxdistance, num)

def accvar(self, df, distance, node_ids=None, xname='x', yname='y',
vname=None, agg="AGG_SUM", decay="DECAY_LINEAR"):
assert self.pya # need to generate pyaccess first
pya = self.pya
if isinstance(agg, str):
agg = getattr(pya, agg)
if isinstance(decay, str):
decay = getattr(pya, decay)
if vname:
df = df.dropna(subset=[vname])
if node_ids is None:
xys = np.array(df[[xname, yname]], dtype="float32")
node_ids = []
for gno in range(pya.numgraphs):
node_ids.append(pya.XYtoNode(xys, distance=1000, gno=gno))
if isinstance(node_ids, str):
l = len(df)
df = df.dropna(subset=[node_ids])
newl = len(df)
if newl-l > 0:
print "Removed %d rows because there are no node_ids" % (newl-l)
node_ids = [df[node_ids].astype("int32").values]
elif not isinstance(node_ids, list):
node_ids = [node_ids]

pya.initializeAccVars(1)
num = 0
aggvar = df[vname].astype('float32') if vname is not None else np.ones(
len(df.index), dtype='float32')
pya.initializeAccVar(num, node_ids, aggvar, preaggregate=0)
res = []
for gno in range(pya.numgraphs):
res.append(pya.getAllAggregateAccessibilityVariables(
distance, num, agg, decay, gno=gno))
return pd.Series(
np.concatenate(res), index=pd.MultiIndex.from_tuples(self.nodeids))

def addnodeid(self, df):

try:
xys = np.array(df[['x', 'y']], dtype="float32")
except:
xys = np.array(df[['X', 'Y']], dtype="float32")

for gno in range(self.pya.numgraphs):
df['_node_id%d' % gno] = pd.Series(
self.pya.XYtoNode(xys, gno=gno), index=df.index)
# assign the external id as well
df['_node_id'] = pd.Series(self.pya.getGraphIDS()[df['_node_id0'].values],
index=df.index)
return df
Binary file added urbansim/utils/tests/osm_sample.h5
Binary file not shown.
89 changes: 89 additions & 0 deletions urbansim/utils/tests/test_networks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os.path

import numpy as np
import pandas as pd
import pytest
import tempfile

from ...sim import simulation as sim
import pandana as pdna
from .. import networks


@pytest.fixture(scope="module")
def sample_osm(request):
store = pd.HDFStore(
os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r")
nodes, edges = store.nodes, store.edges
net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to,
edges[["weight"]])

net.precompute(500)

def fin():
store.close()
request.addfinalizer(fin)

return net


@pytest.fixture
def test_file(request):
name = tempfile.NamedTemporaryFile(suffix='.yaml').name

def cleanup():
if os.path.exists(name):
os.remove(name)
request.addfinalizer(cleanup)

return name


@pytest.fixture()
def sample_df(sample_osm):
num_rows = 500
index = np.random.choice(sample_osm.node_ids, num_rows)
df = pd.DataFrame({"test_col_name": np.random.random(num_rows),
"_node_id": index})
return df


def test_networks_yaml(sample_osm, sample_df, test_file):

@sim.table_source('testing_df')
def source():
return sample_df

s = """
name: networks
desc: Neighborhood Accessibility Variables
model_type: networks
node_col: _node_id
variable_definitions:
- name: test_attr
dataframe: testing_df
varname: test_col_name
radius: 500
apply: np.log1p
filters:
- test_col_name > .1
"""

f = open(test_file, "w")
f.write(s)
f.close()

df = networks.from_yaml(sample_osm, test_file)

assert len(df) == 1498
assert df.describe()['test_attr']['max'] > 0
assert df.describe()['test_attr']['min'] == 0
assert df.describe()['test_attr']['std'] > 0
ind = pd.Series(df.index).describe()
assert ind.loc['min'] > 0
assert ind.loc['count'] == 1498

0 comments on commit e5e2037

Please sign in to comment.