Skip to content

Commit

Permalink
Squash rearrange-reader
Browse files Browse the repository at this point in the history
    Remove python 2.7 from Travis-CI
    Rearrange-Reader: Enable Unknown section tests to pass
    Use TextIOWrapper.tell() to get section start pos
    Add initial LAS 3.0 test infrastructure
    - Add tests/examples/3.0 dir.
    - Add the CWLS's 3.0 example las file.
    - Copy the example file to sample_3.0.las to standardize with
      1.2 and 2.0 sample las files.
    - Create a tests/test_read_30.py with basic read test.
      However, the test is set to SKIP because it current fails on the
      rearrange-reader branch
    First draft at isolated data section reader (kinverarity1#5)
    Now all header sections are parsed fully before returning
    to read data sections.
    Add find_sections_in_file()
    Rebase to master
  • Loading branch information
dcslagel committed Jul 8, 2020
1 parent 006a235 commit 3019d81
Show file tree
Hide file tree
Showing 4 changed files with 360 additions and 205 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: python
python:
- "2.7"
- "3.5"
- "3.6"
- "3.7"
Expand Down
331 changes: 171 additions & 160 deletions lasio/las.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def read(
read_policy="default",
null_policy="strict",
ignore_header_errors=False,
ignore_comments=("#",),
mnemonic_case="upper",
index_unit=None,
**kwargs
Expand All @@ -104,6 +105,8 @@ def read(
just the header metadata. False by default.
ignore_header_errors (bool): ignore LASHeaderErrors (False by
default)
ignore_comments (tuple/str): ignore comments beginning with characters
e.g. ``("#", '"')``
mnemonic_case (str): 'preserve': keep the case of HeaderItem mnemonics
'upper': convert all HeaderItem mnemonics to uppercase
'lower': convert all HeaderItem mnemonics to lowercase
Expand All @@ -114,178 +117,185 @@ def read(
"""

file_obj, self.encoding = reader.open_file(file_ref, **kwargs)

regexp_subs, value_null_subs, version_NULL = reader.get_substitutions(
read_policy, null_policy
)
logger.debug("Reading {}...".format(str(file_ref)))

file_obj = ''
try:
self.raw_sections = reader.read_file_contents(
file_obj, regexp_subs, value_null_subs, ignore_data=ignore_data
file_obj, self.encoding = reader.open_file(file_ref, **kwargs)

logger.debug(
"Fetching substitutions for read_policy {} and null policy {}".format(
read_policy, null_policy
)
)
regexp_subs, value_null_subs, version_NULL = reader.get_substitutions(
read_policy, null_policy
)
finally:
if hasattr(file_obj, "close"):
file_obj.close()

if len(self.raw_sections) == 0:
raise KeyError("No ~ sections found. Is this a LAS file?")
provisional_version = 2.0
provisional_wrapped = "YES"
provisional_null = None

def add_section(pattern, name, **sect_kws):
raw_section = self.match_raw_section(pattern)
drop = []
if raw_section:
self.sections[name] = reader.parse_header_section(
raw_section, **sect_kws
)
drop.append(raw_section["title"])
else:
logger.warning(
"Header section %s regexp=%s was not found." % (name, pattern)
section_positions = reader.find_sections_in_file(file_obj)
logger.debug("Found {} sections".format(len(section_positions)))
if len(section_positions) == 0:
raise KeyError("No ~ sections found. Is this a LAS file?")

data_section_indices = []
for i, (k, first_line, last_line, section_title) in enumerate(
section_positions
):
section_type = reader.determine_section_type(section_title)
logger.debug(
"Parsing {typ} section at lines {first_line}-{last_line} ({k} bytes) {title}".format(
typ=section_type,
title=section_title,
first_line=first_line + 1,
last_line=last_line + 1,
k=k,
)
)
for key in drop:
self.raw_sections.pop(key)

add_section(
"~V",
"Version",
version=1.2,
ignore_header_errors=ignore_header_errors,
mnemonic_case=mnemonic_case,
)
# Read traditional LAS header item section
if section_type == "Header items":
file_obj.seek(k)
sct_items = reader.parse_header_items_section(
file_obj,
line_nos=(first_line, last_line),
version=provisional_version,
ignore_header_errors=ignore_header_errors,
mnemonic_case=mnemonic_case,
ignore_comments=ignore_comments,
)

# Establish version and wrap values if possible.
# Update provisional statuses
if "VERS" in sct_items:
provisional_version = sct_items.VERS.value
if "WRAP" in sct_items:
provisional_wrapped = sct_items.WRAP.value
if "NULL" in sct_items:
provisional_null = sct_items.NULL.value

if section_title[1] == "V":
self.sections["Version"] = sct_items
elif section_title[1] == "W":
self.sections["Well"] = sct_items
elif section_title[1] == "C":
self.sections["Curves"] = sct_items
elif section_title[1] == "P":
self.sections["Parameter"] = sct_items
else:
self.sections[section_title[1:]] = sct_items

# Read free-text LAS header section
elif section_type == "Header (other)":
file_obj.seek(k)
line_no = first_line
contents = []
for line in file_obj:
if line.startswith('~'):
continue
line_no += 1
contents.append(line.strip("\n").strip())
if line_no == last_line:
break
sct_contents = "\n".join(contents)

if section_title[1] == "O":
self.sections["Other"] = sct_contents
else:
self.sections[section_title[1:]] = sct_contents

try:
version = self.version["VERS"].value
except KeyError:
logger.warning("VERS item not found in the ~V section.")
version = None
elif section_type == "Data":
logger.debug("Storing reference and returning later...")
data_section_indices.append(i)

try:
wrap = self.version["WRAP"].value
except KeyError:
logger.warning("WRAP item not found in the ~V section")
wrap = None

# Validate version.
#
# If VERS was missing and version = None, then the file will be read in
# as if version were 2.0. But there will be no VERS HeaderItem, meaning
# that las.write(..., version=None) will fail with a KeyError. But
# las.write(..., version=1.2) will work because a new VERS HeaderItem
# will be created.
if not ignore_data:
for k, first_line, last_line, section_title in [
section_positions[i] for i in data_section_indices
]:
logger.debug("Reading data section {}".format(section_title))

try:
assert version in (1.2, 2, None)
except AssertionError:
if version < 2:
version = 1.2
else:
version = 2
else:
if version is None:
logger.info("Assuming that LAS VERS is 2.0")
version = 2
file_obj.seek(k)
n_columns = reader.inspect_data_section(
file_obj, (first_line, last_line), regexp_subs
)

add_section(
"~W",
"Well",
version=version,
ignore_header_errors=ignore_header_errors,
mnemonic_case=mnemonic_case,
)
file_obj.seek(k)
arr = reader.read_data_section_iterative(
file_obj, (first_line, last_line), regexp_subs, value_null_subs
)
logger.debug("Read ndarray {arrshape}".format(arrshape=arr.shape))

# This is so we can check data size and use self.set_data(data, truncate=False)
# in cases of data.size is zero.
data = arr

if data.size > 0:
# TODO: check whether this treatment of NULLs is correct
logger.debug("~A data {}".format(arr))
if version_NULL:
arr[arr == provisional_null] = np.nan
logger.debug(
"~A after NULL replacement data {}".format(arr)
)

# Establish NULL value if possible.

try:
null = self.well["NULL"].value
except KeyError:
logger.warning("NULL item not found in the ~W section")
null = None

add_section(
"~C",
"Curves",
version=version,
ignore_header_errors=ignore_header_errors,
mnemonic_case=mnemonic_case,
)
add_section(
"~P",
"Parameter",
version=version,
ignore_header_errors=ignore_header_errors,
mnemonic_case=mnemonic_case,
)
s = self.match_raw_section("~O")

drop = []
if s:
self.sections["Other"] = "\n".join(s["lines"])
drop.append(s["title"])
for key in drop:
self.raw_sections.pop(key)

# Deal with nonstandard sections that some operators and/or
# service companies (eg IHS) insist on adding.
drop = []
for s in self.raw_sections.values():
if s["section_type"] == "header":
logger.warning("Found nonstandard LAS section: " + s["title"])
self.sections[s["title"][1:]] = "\n".join(s["lines"])
drop.append(s["title"])
for key in drop:
self.raw_sections.pop(key)

if not ignore_data:
drop = []
s = self.match_raw_section("~A")
s_valid = True
if s is None:
logger.warning("No data section (regexp='~A') found")
s_valid = False
try:
if s["ncols"] is None:
logger.warning("No numerical data found inside ~A section")
s_valid = False
except:
pass

if s_valid:
arr = s["array"]
logger.debug("~A data.shape {}".format(arr.shape))
if version_NULL:
arr[arr == null] = np.nan
logger.debug(
"~A after NULL replacement data.shape {}".format(arr.shape)
)
# Provisionally, assume that the number of columns represented
# by the data section's array is equal to the number of columns
# defined in the Curves/Definition section.

n_curves = len(self.curves)
n_arr_cols = len(self.curves) # provisional pending below check
logger.debug("n_curves=%d ncols=%d" % (n_curves, s["ncols"]))
if wrap == "NO":
if s["ncols"] > n_curves:
n_arr_cols = s["ncols"]
try:
data = np.reshape(arr, (-1, n_arr_cols))
except ValueError as e:
err_msg = (
"cannot reshape ~A array of "
"size {arr_shape} into "
"{n_arr_cols} columns".format(
arr_shape=arr.shape, n_arr_cols=n_arr_cols
n_columns_in_arr = len(self.curves)

# If we are told the file is unwrapped, then we assume that each
# column detected is a column, and we ignore the Curves/Definition
# section's number of columns instead.

if provisional_wrapped == "NO":
n_columns_in_arr = n_columns

#---------------------------------------------------------------------
# TODO:
# This enables tests/test_read.py::test_barebones_missing_all_sections
# to pass, but may not be the complete or final solution.
#---------------------------------------------------------------------
if len(self.curves) == 0 and n_columns > 0:
n_columns_in_arr = n_columns

logger.debug(
"Data array (size {}) assumed to have {} columns "
"({} curves defined)".format(
arr.shape, n_columns_in_arr, len(self.curves)
)
)
)
if sys.version_info.major < 3:
e.message = err_msg
raise e
else:
raise ValueError(err_msg).with_traceback(e.__traceback__)
self.set_data(data, truncate=False)
drop.append(s["title"])
for key in drop:
self.raw_sections.pop(key)

# We attempt to reshape the 1D array read in from
# the data section so that it can be assigned to curves.
try:
data = np.reshape(arr, (-1, n_columns_in_arr))
except ValueError as exception:
error_message = "Cannot reshape ~A data size {0} into {1} columns".format(
arr.shape, n_columns_in_arr
)
if sys.version_info.major < 3:
exception.message = error_message
raise exception
else:
raise ValueError(error_message).with_traceback(
exception.__traceback__
)

self.set_data(data, truncate=False)
finally:
if hasattr(file_obj, "close"):
file_obj.close()

# TODO: reimplement these warnings!!

###### logger.warning("No data section (regexp='~A') found")
###### logger.warning("No numerical data found inside ~A section")

# Understand the depth/index unit.

if "m" in str(index_unit):
index_unit = "m"
Expand Down Expand Up @@ -667,7 +677,7 @@ def set_data(self, array_like, names=None, truncate=False):
data = data[:, len(self.curves)]

# Extend curves list if necessary.
while data.shape[1] > len(self.curves):
while data.size > 0 and (data.shape[1] > len(self.curves)):
self.curves.append(CurveItem(""))

if not names:
Expand All @@ -678,9 +688,10 @@ def set_data(self, array_like, names=None, truncate=False):
names.append("")
logger.debug("set_data. names to use: {}".format(names))

for i, curve in enumerate(self.curves):
curve.mnemonic = names[i]
curve.data = data[:, i]
if data.size > 0:
for i, curve in enumerate(self.curves):
curve.mnemonic = names[i]
curve.data = data[:, i]

self.curves.assign_duplicate_suffixes()

Expand Down
Loading

0 comments on commit 3019d81

Please sign in to comment.