Skip to content

Commit

Permalink
Add exists() and size() methods on ItsdbProfile
Browse files Browse the repository at this point in the history
Resolves #112
  • Loading branch information
goodmami committed May 31, 2017
1 parent e3480b4 commit 398fc54
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 21 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@
## [Unreleased][unreleased]

### Added

* `delphin.itsdb.ItsdbProfile.exists()` (#112)
* `delphin.itsdb.ItsdbProfile.size()` (#112)

### Changed
### Removed
### Fixed

* Properly call `re.sub()` so the flags don't become the count (#108)
* Include file size of gzipped tables in summary of `delphin mkprof` (#110)

### Deprecated

Expand Down
90 changes: 71 additions & 19 deletions delphin/itsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,33 +205,43 @@ def unescape(string):
return re.sub(r'(\\s|\\n|\\\\)', _unescape, string, flags=re.UNICODE)


@contextmanager
def _open_table(tbl_filename):
def _table_filename(tbl_filename):
if tbl_filename.endswith('.gz'):
gz_filename = tbl_filename
tbl_filename = tbl_filename[:-3]
gzfn = tbl_filename
txfn = tbl_filename[:-3]
else:
gz_filename = tbl_filename + '.gz'

if os.path.exists(tbl_filename) and os.path.exists(gz_filename):
logging.warning(
'Both gzipped and plaintext files were found; attempting to '
'use the plaintext one.'
txfn = tbl_filename
gzfn = tbl_filename + '.gz'

if os.path.exists(txfn):
if (os.path.exists(gzfn) and
os.stat(gzfn).st_mtime > os.stat(txfn).st_mtime):
tbl_filename = gzfn
else:
tbl_filename = txfn
elif os.path.exists(gzfn):
tbl_filename = gzfn
else:
raise ItsdbError(
'Table does not exist at {}(.gz)'
.format(tbl_filename)
)
if os.path.exists(tbl_filename):
with open(tbl_filename) as f:
yield f
elif os.path.exists(gz_filename):

return tbl_filename


@contextmanager
def _open_table(tbl_filename):
path = _table_filename(tbl_filename)
if path.endswith('.gz'):
# text mode only from py3.3; until then use TextIOWrapper
with TextIOWrapper(
BufferedReader(gzopen(tbl_filename + '.gz', mode='r'))
) as f:
yield f
else:
raise ItsdbError(
'Table does not exist at {}(.gz)'
.format(tbl_filename)
)
with open(tbl_filename) as f:
yield f


def _write_table(profile_dir, table_name, rows, fields,
Expand Down Expand Up @@ -483,7 +493,7 @@ def __init__(self, path, filters=None, applicators=None, index=True):
)

if self._tables is None:
self._tables = list(self.relations.keys())
self._tables = list(self.relations)

self.filters = defaultdict(list)
self.applicators = defaultdict(list)
Expand Down Expand Up @@ -721,6 +731,48 @@ def write_profile(self, profile_directory, relations_filename=None,
elif os.path.exists(fn) or os.path.exists(fn + '.gz'):
logging.info('Ignoring "{}" table.'.format(table))

def exists(self, table=None):
"""
Return True if the profile or a table exist.
If *table* is `None`, this function returns True if the root
directory exists and contains a valid relations file. If *table*
is given, the function returns True if the table exists as a
file (even if empty). Otherwise it returns False.
"""
if not os.path.isdir(self.root):
return False
if not os.path.isfile(os.path.join(self.root, _relations_filename)):
return False
if table is not None:
try:
_table_filename(os.path.join(self.root, table))
except ItsdbError:
return False
return True

def size(self, table=None):
"""
Return the size, in bytes, of the profile or *table*.
If *table* is `None`, this function returns the size of the
whole profile (i.e. the sum of the table sizes). Otherwise, it
returns the size of *table*.
Note: if the file is gzipped, it returns the compressed size.
"""
size = 0
if table is None:
for table in self.relations:
size += self.size(table)
else:
try:
fn = _table_filename(os.path.join(self.root, table))
size += os.stat(fn).st_size
except ItsdbError:
pass
return size


class ItsdbSkeleton(ItsdbProfile):
"""
Expand Down
3 changes: 1 addition & 2 deletions delphin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,7 @@ def mkprof(args):
fmt = '{:>8} bytes\t{}'
prof = itsdb.ItsdbProfile(outdir, index=False)
relations = prof.relations
tblsort = lambda t: (t[1] not in set(['item', 'item-set', 'fold']), t[0])
tables = [tbl for i, tbl in sorted(enumerate(relations), key=tblsort)]
tables = [tbl for i, tbl in sorted(enumerate(relations))]
for filename in ['relations'] + tables:
f = os.path.join(outdir, filename)
if os.path.isfile(f):
Expand Down

0 comments on commit 398fc54

Please sign in to comment.