diff --git a/awkward/array/base.py b/awkward/array/base.py index 7b5ab006..6129096d 100644 --- a/awkward/array/base.py +++ b/awkward/array/base.py @@ -4,6 +4,10 @@ import types import numbers +try: + from collections.abc import Iterable +except ImportError: + from collections import Iterable import numpy @@ -303,20 +307,20 @@ def _util_toarray(cls, value, defaultdtype, passthrough=None): @classmethod def _util_arraystr_draw(cls, x): - if isinstance(x, list): + if isinstance(x, tuple): + return "(" + ", ".join(cls._util_arraystr_draw(y) for y in x) + ")" + elif isinstance(x, Iterable): if len(x) > 6: return "[" + " ".join(cls._util_arraystr_draw(y) for y in x[:3]) + " ... " + " ".join(cls._util_arraystr_draw(y) for y in x[-3:]) + "]" else: return "[" + " ".join(cls._util_arraystr_draw(y) for y in x) + "]" - elif isinstance(x, tuple): - return "(" + ", ".join(cls._util_arraystr_draw(y) for y in x) + ")" else: return repr(x) @classmethod def _util_arraystr(cls, array): if isinstance(array, cls.numpy.ndarray): - return cls._util_arraystr_draw(array.tolist()) + return cls._util_arraystr_draw(array) elif isinstance(array, AwkwardArray): return str(array).replace("\n", "") else: @@ -405,6 +409,8 @@ def _concatenate_axis1(cls, arrays): def _util_isstringslice(cls, where): if isinstance(where, awkward.util.string): return True + elif isinstance(where, bytes): + raise TypeError("column selection must be str, not bytes, in Python 3") elif isinstance(where, tuple): return False elif isinstance(where, (cls.numpy.ndarray, AwkwardArray)) and issubclass(where.dtype.type, (numpy.str, numpy.str_)): diff --git a/awkward/array/table.py b/awkward/array/table.py index 46f887ad..7d7e3c25 100644 --- a/awkward/array/table.py +++ b/awkward/array/table.py @@ -2,6 +2,7 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-array/blob/master/LICENSE +import numbers import re import types from collections import OrderedDict @@ -10,6 +11,8 @@ except ImportError: from collections import Iterable +import numpy + import awkward.array.base import awkward.type import awkward.util @@ -38,7 +41,7 @@ def __repr__(self): elif getattr(self._table, "_showdict", False): return "<{0} {{{1}}}>".format(self._table._rowname, ", ".join("{0}: {1}".format(repr(n), str(self[n])) for n in self._table._contents)) else: - return "<{0} {1}>".format(self._table._rowname, self._index) + return "<{0} {1}>".format(self._table._rowname, self._index + self._table.rowstart) def __contains__(self, name): return name in self._table._contents @@ -181,6 +184,7 @@ def __init__(self, columns1={}, *columns2, **columns3): self._view = None self._base = None self.rowname = "Row" + self.rowstart = None self._contents = OrderedDict() seen = set() @@ -229,6 +233,22 @@ def rowname(self, value): raise TypeError("rowname must be a string") self._rowname = value + @property + def rowstart(self): + if self._rowstart is not None: + return self._rowstart + elif self._base is not None: + return self._base.rowstart + else: + return 0 + + @rowstart.setter + def rowstart(self, value): + if self.check_prop_valid: + if value is not None and not isinstance(value, (numbers.Integral, numpy.integer)): + raise TypeError("rowstart must be None or an integer") + self._rowstart = value + @classmethod def fromrec(cls, recarray): if not isinstance(recarray, cls.numpy.ndarray) or recarray.dtype.names is None: @@ -239,10 +259,11 @@ def fromrec(cls, recarray): return out @classmethod - def frompairs(cls, pairs): + def frompairs(cls, pairs, rowstart): out = cls() for n, x in pairs: out[n] = x + out._rowstart = rowstart return out @classmethod @@ -255,12 +276,14 @@ def fromview(cls, view, base): out = base.copy() out._view = int(start), int(step), int(length) out._base = base + out._rowstart = None return out elif isinstance(view, cls.numpy.ndarray) and cls._util_isintegertype(view.dtype.type): out = base.copy() out._view = view out._base = base + out._rowstart = None return out else: @@ -270,6 +293,7 @@ def copy(self, contents=None): out = self.__class__.__new__(self.__class__) out._view = self._view out._base = self._base + out._rowstart = self._rowstart out._rowname = self._rowname out._contents = self._contents if contents is not None and isinstance(contents, dict): @@ -285,12 +309,14 @@ def deepcopy(self, contents=None): out._contents = OrderedDict([(n, self._util_deepcopy(x[out._index()])) for n, x in out._contents.items()]) out._view = None out._base = None + out._rowstart = None return out def empty_like(self, **overrides): out = self.__class__.__new__(self.__class__) out._view = None out._base = None + out._rowstart = None out._rowname = self._rowname out._contents = OrderedDict() return out @@ -316,7 +342,8 @@ def ones_like(self, **overrides): def __awkward_persist__(self, ident, fill, prefix, suffix, schemasuffix, storage, compression, **kwargs): self._valid() out = {"call": ["awkward", "Table", "frompairs"], - "args": [{"pairs": [[n, fill(x, "Table.contents", prefix, suffix, schemasuffix, storage, compression, **kwargs)] for n, x in self._contents.items()]}]} + "args": [{"pairs": [[n, fill(x, "Table.contents", prefix, suffix, schemasuffix, storage, compression, **kwargs)] for n, x in self._contents.items()]}, + {"json": self.rowstart}]} if isinstance(self._view, tuple): start, step, length = self._view out = {"call": ["awkward", "Table", "fromview"], @@ -509,10 +536,17 @@ def __iter__(self, checkiter=True): def __getitem__(self, where): if self._util_isstringslice(where): if isinstance(where, awkward.util.string): - try: - return self._contents[where][self._index()] - except KeyError: - raise ValueError("no column named {0}".format(repr(where))) + if self._view is None: + try: + return self._contents[where] + except KeyError: + raise ValueError("no column named {0}".format(repr(where))) + else: + index = self._index() + try: + return self._contents[where][index] + except KeyError: + raise ValueError("no column named {0}".format(repr(where))) else: contents = OrderedDict() for n in where: @@ -540,6 +574,7 @@ def __getitem__(self, where): out = self.copy(contents=self._contents) out._view = newslice out._base = self + out._rowstart = None return out def __setitem__(self, where, what): diff --git a/awkward/arrow.py b/awkward/arrow.py index 1a20b976..6b4046c2 100644 --- a/awkward/arrow.py +++ b/awkward/arrow.py @@ -231,7 +231,7 @@ def popbuffers(tpe, buffers): pairs = [] for i in range(tpe.num_children - 1, -1, -1): pairs.insert(0, (tpe[i].name, popbuffers(tpe[i].type, buffers))) - out = awkwardlib.Table.frompairs(pairs) + out = awkwardlib.Table.frompairs(pairs, 0) # FIXME: better rowstart mask = buffers.pop() if mask is not None: mask = awkwardlib.numpy.frombuffer(mask, dtype=ARROW_BITMASKTYPE) @@ -446,9 +446,8 @@ def convert(obj, message): writer.close() class _ParquetFile(object): - def __init__(self, file, cache=None, metadata=None, common_metadata=None): + def __init__(self, file, metadata=None, common_metadata=None): self.file = file - self.cache = cache self.metadata = metadata self.common_metadata = common_metadata self._init() @@ -463,7 +462,6 @@ def __getstate__(self): def __setstate__(self, state): self.file = state["file"] - self.cache = None self.metadata = state["metadata"] self.common_metadata = state["common_metadata"] self._init() @@ -477,11 +475,11 @@ def tojson(self): @classmethod def fromjson(cls, state): - return cls(state["file"], cache=None, metadata=state["metadata"], common_metadata=state["common_metadata"]) + return cls(state["file"], metadata=state["metadata"], common_metadata=state["common_metadata"]) def fromparquet(file, awkwardlib=None, cache=None, persistvirtual=False, metadata=None, common_metadata=None): awkwardlib = awkward.util.awkwardlib(awkwardlib) - parquetfile = _ParquetFile(file, cache=cache, metadata=metadata, common_metadata=common_metadata) + parquetfile = _ParquetFile(file, metadata=metadata, common_metadata=common_metadata) columns = parquetfile.type.columns chunks = [] diff --git a/awkward/generate.py b/awkward/generate.py index 73e42357..8c588e27 100644 --- a/awkward/generate.py +++ b/awkward/generate.py @@ -227,7 +227,7 @@ def append(self, obj, tpe): return UnionFillable(self, self.awkwardlib).append(obj, tpe) def finalize(self, **options): - return self.awkwardlib.Table.frompairs((n, self.contents[n].finalize(**options)) for n in sorted(self.fields)) + return self.awkwardlib.Table.frompairs([(n, self.contents[n].finalize(**options)) for n in sorted(self.fields)], 0) class ObjectFillable(Fillable): __slots__ = ["content", "cls", "awkwardlib"] diff --git a/awkward/persist.py b/awkward/persist.py index 442203d4..b613d6d0 100644 --- a/awkward/persist.py +++ b/awkward/persist.py @@ -42,6 +42,9 @@ ["awkward.persist", "*"], ["awkward.arrow", "_ParquetFile", "fromjson"], ["uproot_methods.classes.*"], + ["uproot.tree._LazyFiles"], + ["uproot.tree._LazyTree"], + ["uproot.tree._LazyBranch"], ] def frompython(obj): diff --git a/awkward/version.py b/awkward/version.py index ae970d08..d96b0dca 100644 --- a/awkward/version.py +++ b/awkward/version.py @@ -4,7 +4,7 @@ import re -__version__ = "0.9.1" +__version__ = "0.10.0" version = __version__ version_info = tuple(re.split(r"[-\.]", __version__)) diff --git a/tests/test_arrow.py b/tests/test_arrow.py index bc1f55b9..2b8af5f9 100644 --- a/tests/test_arrow.py +++ b/tests/test_arrow.py @@ -414,9 +414,11 @@ def test_arrow_writeparquet2(tmpdir): assert len(c.chunks) == 1 and len(d.chunks) == 1 assert isinstance(c.chunks[0], awkward.Table) and isinstance(d.chunks[0], awkward.Table) assert c.chunks[0].columns == d.chunks[0].columns - assert isinstance(c.chunks[0]["x"], awkward.BitMaskedArray) and isinstance(d.chunks[0]["x"], awkward.BitMaskedArray) - assert c.chunks[0]["x"].boolmask().tolist() == d.chunks[0]["x"].boolmask().tolist() - assert isinstance(c.chunks[0]["x"].content, awkward.JaggedArray) and isinstance(d.chunks[0]["x"].content, awkward.JaggedArray) - assert isinstance(c.chunks[0]["x"].content.content, awkward.BitMaskedArray) and isinstance(d.chunks[0]["x"].content.content, awkward.BitMaskedArray) - assert c.chunks[0]["x"].content.content.boolmask().tolist() == d.chunks[0]["x"].content.content.boolmask().tolist() - assert isinstance(c.chunks[0]["x"].content.content.content, numpy.ndarray) and isinstance(d.chunks[0]["x"].content.content.content, numpy.ndarray) + cstuff = c.chunks[0]["x"][:] + dstuff = d.chunks[0]["x"][:] + assert isinstance(cstuff, awkward.BitMaskedArray) and isinstance(dstuff, awkward.BitMaskedArray) + assert cstuff.boolmask().tolist() == dstuff.boolmask().tolist() + assert isinstance(cstuff.content, awkward.JaggedArray) and isinstance(dstuff.content, awkward.JaggedArray) + assert isinstance(cstuff.content.content, awkward.BitMaskedArray) and isinstance(dstuff.content.content, awkward.BitMaskedArray) + assert cstuff.content.content.boolmask().tolist() == dstuff.content.content.boolmask().tolist() + assert isinstance(cstuff.content.content.content, numpy.ndarray) and isinstance(dstuff.content.content.content, numpy.ndarray)