From 0c8e436891099f544f35928ffd41d3ebaa522c69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Castel=C3=A3o?= Date: Sat, 28 Jan 2023 21:28:58 -0700 Subject: [PATCH] Initializing a fresh pyproject.toml (#71) * Initializing a fresh pyproject.toml Upgrading package structure. * Updating ci reference to branch main * Moving package's metadata to pyproject.toml * fix: Missing a version * Moved dependencies * Migrated testing requirements to pyproject.toml * Moving other optionals * Project's URL * Moving console script definition * How the version is defined * Seting up black for 88 line-length * Using dynamic versioning * Cleaning everything related to hardcoded version * Keeping Py-3.6 for a little bit longer It took me so long to update this, now I should at least give some. There is a chance of someone stuck on older versions because of this. * fix: Just use float instead * Updating history * Testing with Py-3.8 * Testing with 3.9 & 3.10 * fix: encoding for json.loads is deprecated * Don't fail-fast so it is easier to debug * Updating code syntax * style: Applying black * style: More formating with black * Updating GA Python setup * style: More work improving code style * Adding pre-commit setup This is probably what will be used. Upgrade pyupgrade standard once stop supporting Py3.6 --- .github/workflows/ci.yml | 13 +- .gitignore | 1 + .pre-commit-config.yaml | 32 ++ HISTORY.rst | 17 +- README.rst | 2 +- VERSION | 1 - bin/ctdqc | 271 +++++++------- docs/source/commandline.rst | 4 +- docs/source/conf.py | 145 ++++---- docs/source/getting_started.rst | 10 +- docs/source/index.rst | 1 - docs/source/install.rst | 2 +- docs/source/overview.rst | 11 +- pyproject.toml | 62 ++++ requirements_dev.txt | 9 - seabird/__init__.py | 22 +- seabird/checks.py | 2 - seabird/cli.py | 53 ++- seabird/cnv.py | 627 ++++++++++++++++---------------- seabird/exceptions.py | 6 +- seabird/netcdf.py | 61 ++-- seabird/qc.py | 25 +- seabird/rules/refnames.json | 92 ++--- seabird/utils.py | 138 +++---- setup.cfg | 18 - setup.py | 44 --- tests/test_import.py | 7 +- tests/test_parse.py | 56 ++- tests/test_rules.py | 15 +- tests/test_serialize.py | 19 +- 30 files changed, 913 insertions(+), 853 deletions(-) create mode 100644 .pre-commit-config.yaml delete mode 100644 VERSION create mode 100644 pyproject.toml delete mode 100644 requirements_dev.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d858a6e..93e4569 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: pySeabird on: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] schedule: - cron: "7 18 * * 0" @@ -14,14 +14,17 @@ jobs: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python-version: ["3.7"] + python-version: ["3.7", "3.8", "3.9", "3.10"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 + with: + fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/.gitignore b/.gitignore index e74d3fd..132aa0f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ parts sdist develop-eggs .installed.cfg +version.py # Installer logs pip-log.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..504d38f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,32 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +default_language_version: + python: python3.10 +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-ast + - id: check-docstring-first + - id: check-merge-conflict + - id: check-yaml + - id: check-toml + - id: debug-statements + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-added-large-files +- repo: https://github.com/asottile/pyupgrade + rev: v2.38.2 + hooks: + - id: pyupgrade + args: [--py36-plus] +- repo: https://github.com/psf/black + rev: 22.8.0 + hooks: + - id: black + args: [ --safe ] + exclude: docs/conf.py +- repo: https://github.com/PyCQA/flake8 + rev: 5.0.4 + hooks: + - id: flake8 diff --git a/HISTORY.rst b/HISTORY.rst index 50e3bb5..634c455 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,12 @@ History ------- +0.12.x +------ + +* Upgrading package structure. Before it was still an early Python-3 structure + that limited to Python<3.8 + 0.11.x ------ @@ -100,7 +106,7 @@ History * When position available in the header.intro as NMEA, load this instead of try to parse from the header.notes. -* Load default values from a yaml file, like attributes{cruise, project, +* Load default values from a yaml file, like attributes{cruise, project, shipname ...} * Using a recommended variable names list @@ -121,3 +127,12 @@ History * Initial import. * It's able to load the variables into Masked Array, but it's not the best way to do it. + +pre-0.1 +------- + +This package was derived from what is now CoTeDe. It had a different name at +that time. Gui re-structure it in 2006 into a consistent Python package to +quality control TSG at NOAA/AOML. Operating with different models of TSGs and +different versions of outputs, it was crucial to parse and normalize those +into a consistent data model for a standard QC procedure. diff --git a/README.rst b/README.rst index 2538a01..a7008ed 100644 --- a/README.rst +++ b/README.rst @@ -52,7 +52,7 @@ To install: pip install seabird -One way to use is running on the shell the cnvdump. Independent of the historical version of the cnv file, it will return a default structure: +One way to use is running on the shell the cnvdump. Independent of the historical version of the cnv file, it will return a default structure: seabird cnvdump your_file.cnv diff --git a/VERSION b/VERSION deleted file mode 100644 index 797567c..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -version='0.11.5' diff --git a/bin/ctdqc b/bin/ctdqc index 5f1426c..1d26624 100755 --- a/bin/ctdqc +++ b/bin/ctdqc @@ -17,13 +17,21 @@ from seabird.exceptions import CNVError # ==== Parsing the options on command line parser = OptionParser() -parser.add_option("--clean", dest="clean", - action="store_true", default = False, - help="If selected will remove lines with only bad data.") - -parser.add_option("--no-header", dest="noheader", - action="store_true", default = False, - help="If selected will not show the header.") +parser.add_option( + "--clean", + dest="clean", + action="store_true", + default=False, + help="If selected will remove lines with only bad data.", +) + +parser.add_option( + "--no-header", + dest="noheader", + action="store_true", + default=False, + help="If selected will not show the header.", +) (options, args) = parser.parse_args() @@ -32,7 +40,9 @@ try: profile = fProfileQC(args[0], verbose=False) except CNVError as e: print("\033[91m%s\033[0m" % e.msg) - import sys; sys.exit() + import sys + + sys.exit() except: raise @@ -43,123 +53,128 @@ fnameout = args[0].replace(".cnv", "_qced.nc") import netCDF4 -nc = netCDF4.Dataset(fnameout, 'w') +nc = netCDF4.Dataset(fnameout, "w") -Nz = profile['pressure'].size +Nz = profile["pressure"].size -nc.createDimension('TIME', 0) -nc.createDimension('DEPTH', Nz) -nc.createDimension('LATITUDE', 1) -nc.createDimension('LONGITUDE', 1) +nc.createDimension("TIME", 0) +nc.createDimension("DEPTH", Nz) +nc.createDimension("LATITUDE", 1) +nc.createDimension("LONGITUDE", 1) output = {} -output['TIME'] = nc.createVariable('TIME', 'f8', 'TIME', fill_value=999999.) -output['TIME'].long_name = "time" -output['TIME'].standard_name = "time" -output['TIME'].units = "days since 1950-01-01T00:00:00Z" -output['TIME'].valid_min = 0. -output['TIME'].valid_max = 90000. -output['TIME'].axis = "T" -output['TIME'][0] = netCDF4.date2num(profile.attributes['datetime'], - "days since 1950-01-01T00:00:00Z") - -output['LATITUDE'] = nc.createVariable('LATITUDE', 'f4', 'LATITUDE', - fill_value=99999.) -output['LATITUDE'].long_name = "Latitude of each location" -output['LATITUDE'].standard_name = "latitude" -output['LATITUDE'].units = "degrees_north" -output['LATITUDE'].valid_min = -90. -output['LATITUDE'].valid_max = 90. -output['LATITUDE'].axis = "Y" - -output['LONGITUDE'] = nc.createVariable('LONGITUDE', 'f4', 'LONGITUDE', - fill_value=99999.) -output['LONGITUDE'].long_name = "Longitude of each location" -output['LONGITUDE'].standard_name = "longitude" -output['LONGITUDE'].units = "degrees_east" -output['LONGITUDE'].valid_min = -180. -output['LONGITUDE'].valid_max = 180. -output['LONGITUDE'].axis = "X" - -output['DEPTH'] = nc.createVariable('DEPTH', 'f4', 'DEPTH', - fill_value=-99999.0) -output['DEPTH'].standard_name = "depth" -output['DEPTH'].units = "meters" -output['DEPTH'].positive = "down" -output['DEPTH'].axis = "Z" -output['DEPTH'].reference = "sea_level" -output['DEPTH'].long_name = "Depth of measurement" -output['DEPTH'].valid_min = 0. -output['DEPTH'].valid_max = 12000. -#DEPTH:QC_indicator = 8 ; -#DEPTH:processing_level ; -#DEPTH:QC_procedure = 2 ; -#DEPTH:uncertainty = 1. ; -output['DEPTH'].ancillary_variables = "DEPH_QC" ; -#DEPTH:comment = "Calculated using integration of insitu density. Sverdrup, et al. 1942" ; -output['DEPTH'].DM_indicator = "D" -output['DEPTH'][:] = profile['pressure'] - - -output['PRES'] = nc.createVariable('PRES', 'f4', 'DEPTH', - fill_value=-99999.0) -output['PRES'][:] = profile['pressure'] - - -output['TEMP'] = nc.createVariable('TEMP', 'f4', ('TIME','DEPTH'), - fill_value=99999.) -output['TEMP'].standard_name = "sea_water_temperature" -output['TEMP'].units = "degree_Celsius" -output['TEMP'].long_name = "sea water temperature in-situ ITS-90 scale" -output['TEMP'].valid_min = -2.0 -output['TEMP'].valid_max = 40. -output['TEMP'].cell_methods = "TIME: mean DEPTH: point LATITUDE: point LONGITUDE: point" -output['TEMP'].reference_scale = "ITS-90" -output['TEMP'].ancillary_variables = "TEMP_QC" -output['TEMP'][0, :] = profile['temperature'] - - -output['TEMP_QC'] = nc.createVariable('TEMP_QC', 'i1', ('TIME','DEPTH'), - fill_value=99999.) -output['TEMP_QC'].long_name = "quality flag of sea water temperature" -output['TEMP_QC'].conventions = "OceanSITES QC Flags" -output['TEMP_QC'].coordinates = "TIME DEPTH LATITUDE LONGITUDE" -output['TEMP_QC'].flag_values = [0, 1, 2, 3, 4, 7, 8, 9] -output['TEMP_QC'].flag_meanings = "unknown good_data probably_good_data potentially_correctable bad_data bad_data nominal_value interpolated_value missing_value" -output['TEMP_QC'][0, :] = combined_flag(profile.flags['temperature']) - - -output['PSAL'] = nc.createVariable('PSAL', 'f4', ('TIME','DEPTH'), - fill_value=99999.) -output['PSAL'].standard_name = "sea_water_practical_salinity" -output['PSAL'].units = "psu" -output['PSAL'].long_name = "sea water salinity" -output['PSAL'].valid_min = 0. -output['PSAL'].valid_max = 45. -output['PSAL'].cell_methods = "TIME: mean DEPTH: point LATITUDE: point LONGITUDE: point" -output['PSAL'].ancillary_variables = "PSAL_QC" -output['PSAL'][0, :] = profile['salinity'] - - -output['PSAL_QC'] = nc.createVariable('PSAL_QC', 'i1', ('TIME','DEPTH'), - fill_value=99999.) -output['PSAL_QC'].long_name = "quality flag of sea water salinity" -output['PSAL_QC'].conventions = "OceanSITES QC Flags" -output['PSAL_QC'].coordinates = "TIME DEPTH LATITUDE LONGITUDE" -output['PSAL_QC'].flag_values = [0, 1, 2, 3, 4, 7, 8, 9] -output['PSAL_QC'].flag_meanings = "unknown good_data probably_good_data potentially_correctable bad_data bad_data nominal_value interpolated_value missing_value" -output['PSAL_QC'][0, :] = combined_flag(profile.flags['salinity']) +output["TIME"] = nc.createVariable("TIME", "f8", "TIME", fill_value=999999.0) +output["TIME"].long_name = "time" +output["TIME"].standard_name = "time" +output["TIME"].units = "days since 1950-01-01T00:00:00Z" +output["TIME"].valid_min = 0.0 +output["TIME"].valid_max = 90000.0 +output["TIME"].axis = "T" +output["TIME"][0] = netCDF4.date2num( + profile.attributes["datetime"], "days since 1950-01-01T00:00:00Z" +) + +output["LATITUDE"] = nc.createVariable("LATITUDE", "f4", "LATITUDE", fill_value=99999.0) +output["LATITUDE"].long_name = "Latitude of each location" +output["LATITUDE"].standard_name = "latitude" +output["LATITUDE"].units = "degrees_north" +output["LATITUDE"].valid_min = -90.0 +output["LATITUDE"].valid_max = 90.0 +output["LATITUDE"].axis = "Y" + +output["LONGITUDE"] = nc.createVariable( + "LONGITUDE", "f4", "LONGITUDE", fill_value=99999.0 +) +output["LONGITUDE"].long_name = "Longitude of each location" +output["LONGITUDE"].standard_name = "longitude" +output["LONGITUDE"].units = "degrees_east" +output["LONGITUDE"].valid_min = -180.0 +output["LONGITUDE"].valid_max = 180.0 +output["LONGITUDE"].axis = "X" + +output["DEPTH"] = nc.createVariable("DEPTH", "f4", "DEPTH", fill_value=-99999.0) +output["DEPTH"].standard_name = "depth" +output["DEPTH"].units = "meters" +output["DEPTH"].positive = "down" +output["DEPTH"].axis = "Z" +output["DEPTH"].reference = "sea_level" +output["DEPTH"].long_name = "Depth of measurement" +output["DEPTH"].valid_min = 0.0 +output["DEPTH"].valid_max = 12000.0 +# DEPTH:QC_indicator = 8 ; +# DEPTH:processing_level ; +# DEPTH:QC_procedure = 2 ; +# DEPTH:uncertainty = 1. ; +output["DEPTH"].ancillary_variables = "DEPH_QC" +# DEPTH:comment = "Calculated using integration of insitu density. Sverdrup, et al. 1942" ; +output["DEPTH"].DM_indicator = "D" +output["DEPTH"][:] = profile["pressure"] + + +output["PRES"] = nc.createVariable("PRES", "f4", "DEPTH", fill_value=-99999.0) +output["PRES"][:] = profile["pressure"] + + +output["TEMP"] = nc.createVariable("TEMP", "f4", ("TIME", "DEPTH"), fill_value=99999.0) +output["TEMP"].standard_name = "sea_water_temperature" +output["TEMP"].units = "degree_Celsius" +output["TEMP"].long_name = "sea water temperature in-situ ITS-90 scale" +output["TEMP"].valid_min = -2.0 +output["TEMP"].valid_max = 40.0 +output["TEMP"].cell_methods = "TIME: mean DEPTH: point LATITUDE: point LONGITUDE: point" +output["TEMP"].reference_scale = "ITS-90" +output["TEMP"].ancillary_variables = "TEMP_QC" +output["TEMP"][0, :] = profile["temperature"] + + +output["TEMP_QC"] = nc.createVariable( + "TEMP_QC", "i1", ("TIME", "DEPTH"), fill_value=99999.0 +) +output["TEMP_QC"].long_name = "quality flag of sea water temperature" +output["TEMP_QC"].conventions = "OceanSITES QC Flags" +output["TEMP_QC"].coordinates = "TIME DEPTH LATITUDE LONGITUDE" +output["TEMP_QC"].flag_values = [0, 1, 2, 3, 4, 7, 8, 9] +output[ + "TEMP_QC" +].flag_meanings = "unknown good_data probably_good_data potentially_correctable bad_data bad_data nominal_value interpolated_value missing_value" +output["TEMP_QC"][0, :] = combined_flag(profile.flags["temperature"]) + + +output["PSAL"] = nc.createVariable("PSAL", "f4", ("TIME", "DEPTH"), fill_value=99999.0) +output["PSAL"].standard_name = "sea_water_practical_salinity" +output["PSAL"].units = "psu" +output["PSAL"].long_name = "sea water salinity" +output["PSAL"].valid_min = 0.0 +output["PSAL"].valid_max = 45.0 +output["PSAL"].cell_methods = "TIME: mean DEPTH: point LATITUDE: point LONGITUDE: point" +output["PSAL"].ancillary_variables = "PSAL_QC" +output["PSAL"][0, :] = profile["salinity"] + + +output["PSAL_QC"] = nc.createVariable( + "PSAL_QC", "i1", ("TIME", "DEPTH"), fill_value=99999.0 +) +output["PSAL_QC"].long_name = "quality flag of sea water salinity" +output["PSAL_QC"].conventions = "OceanSITES QC Flags" +output["PSAL_QC"].coordinates = "TIME DEPTH LATITUDE LONGITUDE" +output["PSAL_QC"].flag_values = [0, 1, 2, 3, 4, 7, 8, 9] +output[ + "PSAL_QC" +].flag_meanings = "unknown good_data probably_good_data potentially_correctable bad_data bad_data nominal_value interpolated_value missing_value" +output["PSAL_QC"][0, :] = combined_flag(profile.flags["salinity"]) nc.close() -import sys; sys.exit() +import sys + +sys.exit() -varnames = ['timeS'] -output = profile['timeS'] +varnames = ["timeS"] +output = profile["timeS"] -varnames.append('pressure') -output = ma.vstack((output, profile['pressure'])) +varnames.append("pressure") +output = ma.vstack((output, profile["pressure"])) -for v in ['temperature', 'salinity']: +for v in ["temperature", "salinity"]: varnames.append(v) output = ma.vstack((output, profile[v])) f = combined_flag(profile.flags[v]) @@ -168,26 +183,28 @@ for v in ['temperature', 'salinity']: output = output.T -#import pdb; pdb.set_trace() +# import pdb; pdb.set_trace() header = "" if options.noheader is False: - for a in ['filename', 'md5', 'datetime', 'latitude', 'longitude']: - header += "%s: %s\n" % (a, profile.attributes[a]) + for a in ["filename", "md5", "datetime", "latitude", "longitude"]: + header += "{}: {}\n".format(a, profile.attributes[a]) -#header += "%s\n" % ",\t".join(varnames) +# header += "%s\n" % ",\t".join(varnames) header += ",".join(["%17s" % v for v in varnames]) -#np.savetxt('test.out', output, header=header, delimiter=', ') +# np.savetxt('test.out', output, header=header, delimiter=', ') if options.clean is True: - output = output[(output[:,5] == 1) & (output[:,3] ==1)] + output = output[(output[:, 5] == 1) & (output[:, 3] == 1)] -#print ",\t".join(varnames) +# print ",\t".join(varnames) print(header) for row in output: - #print '%10s,\t%10s,\t%8s,\t%i,\t%8s,\t%i' % \ - print('%17s,%17s,%17s,%17s,%17s,%17s' % - (row[0], row[1], row[2], int(row[3]), row[4], int(row[5]))) + # print '%10s,\t%10s,\t%8s,\t%i,\t%8s,\t%i' % \ + print( + "%17s,%17s,%17s,%17s,%17s,%17s" + % (row[0], row[1], row[2], int(row[3]), row[4], int(row[5])) + ) -#np.savetxt(args[1], output.T, +# np.savetxt(args[1], output.T, # fmt='%.3f,\t%.2f,\t%.3f,\t%i,\t%.3f,\t%i', # header=",\t".join(varnames)) diff --git a/docs/source/commandline.rst b/docs/source/commandline.rst index 9cfa8ba..0cb040c 100644 --- a/docs/source/commandline.rst +++ b/docs/source/commandline.rst @@ -2,14 +2,14 @@ Command line (ctdqc) ==================== -A CTD data file can be quality controled from the shell script using the command line ctdqc. +A CTD data file can be quality controled from the shell script using the command line ctdqc. On this way it's easy to run the quality control from the shell, for example in a cron script for operational procedures. In the shell one can run:: $ ctdqc MyData.cnv -A new file is created, MyData_qced.nc with depth, temperature and salinity, with the respective quality control flags. +A new file is created, MyData_qced.nc with depth, temperature and salinity, with the respective quality control flags. It's used the default cotede setup of tests. With the command line it's easy to run in a full collection of cnv files, like:: diff --git a/docs/source/conf.py b/docs/source/conf.py index d380432..2fe189d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Seabird documentation build configuration file, created by # sphinx-quickstart on Sat May 10 08:24:18 2014. @@ -18,55 +17,55 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.coverage', + "sphinx.ext.autodoc", + "sphinx.ext.coverage", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'Seabird' -copyright = u'2014, Guilherme Castlão' +project = "Seabird" +copyright = "2014, Guilherme Castlão" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.5' +version = "0.5" # The full version, including alpha/beta/rc tags. -release = '0.5.3' +release = "0.5.3" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -74,167 +73,161 @@ # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +html_theme = "default" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'Seabirddoc' +htmlhelp_basename = "Seabirddoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - ('index', 'Seabird.tex', u'Seabird Documentation', - u'Guilherme Castelão', 'manual'), + ("index", "Seabird.tex", "Seabird Documentation", "Guilherme Castelão", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'seabird', u'Seabird Documentation', - [u'Guilherme Castlão'], 1) -] +man_pages = [("index", "seabird", "Seabird Documentation", ["Guilherme Castlão"], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -243,19 +236,25 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'Seabird', u'Seabird Documentation', - u'Guilherme Castlão', 'Seabird', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "Seabird", + "Seabird Documentation", + "Guilherme Castlão", + "Seabird", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index be17e42..b0d6b06 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -1,5 +1,5 @@ **************************** -Getting Started with Seabird +Getting Started with Seabird **************************** Inside python @@ -26,13 +26,13 @@ The data from a profile is hence treated as it was a dictionary of Masked Arrays From the terminal ================= -One way to use is running on the shell the cnvdump. +One way to use is running on the shell the cnvdump. Independent of the historical version of the cnv file, it will return a default structure:: seabird cnvdump your_file.cnv -That can be used in a regular shell script. -For example, let's consider a directory cruise1 with several sub directories, one for each leg of the cruise. +That can be used in a regular shell script. +For example, let's consider a directory cruise1 with several sub directories, one for each leg of the cruise. One could list all the latitudes of each CTD cast like:: for file in `find ./cruise1 -iname '*.cnv'` @@ -98,5 +98,5 @@ More examples ============= I keep a notebooks collection of `practical examples handling CTD data `_ -. +. If you have any suggestion, please let me know. diff --git a/docs/source/index.rst b/docs/source/index.rst index 2f86782..d513f67 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -33,4 +33,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/docs/source/install.rst b/docs/source/install.rst index c5b2519..afcd5be 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -16,7 +16,7 @@ Optional requirement - `CoTeDe `_, if you want to quality control your data with custom or pre-set group of checks. -Installing Seabird +Installing Seabird ================== Virtual Environments diff --git a/docs/source/overview.rst b/docs/source/overview.rst index 8cc4b2d..a995107 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -2,16 +2,15 @@ Overview ******** -Seabird is a popular brand of sensors used for hydrographic measurements around the world, and that means a great deal of historical CTD data. -These hydrographic profiles are usually available as ASCII files, containing the data itself, and plenty of fundamental metadata, such as position, date, calibration coefficients, and much more. +Seabird is a popular brand of sensors used for hydrographic measurements around the world, and that means a great deal of historical CTD data. +These hydrographic profiles are usually available as ASCII files, containing the data itself, and plenty of fundamental metadata, such as position, date, calibration coefficients, and much more. Typically, these files are not hard for a human to interpret, but their format has changed over time, so it is a problem for automated processing. -While working with several years of CTD data from the project PIRATA, I realized that the first problem is just to be able to properly read all the data. +While working with several years of CTD data from the project PIRATA, I realized that the first problem is just to be able to properly read all the data. I built this Python package with the goal to parse, in a robust way, the different historical Seabird output data file formats, and return that data in a uniform structure. -At this point, my goal is to have an object with attributes parsed from the header, and the data in (NumPy) Masked Arrays, so that the user doesn't need to manually determine the version and details of a .cnv file, but will still have it in a standard pattern, ready to use. +At this point, my goal is to have an object with attributes parsed from the header, and the data in (NumPy) Masked Arrays, so that the user doesn't need to manually determine the version and details of a .cnv file, but will still have it in a standard pattern, ready to use. Taking advantage of the basic library, this package includes some binary commands to output content as ASCII, but in a persistent format, or to convert it into a NetCDF file. -ATTENTION: this is not an official Sea-Bird package, so if you have trouble with it, please do not complain to Sea-Bird. +ATTENTION: this is not an official Sea-Bird package, so if you have trouble with it, please do not complain to Sea-Bird. Instead, open an issue at GitHub (https://github.com/castelao/seabird/issues), and I'll try to help you. - diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..becf56d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,62 @@ +[build-system] +requires = [ + "setuptools >= 48", + "setuptools_scm[toml] >= 6.2", + "setuptools_scm_git_archive", + "wheel >= 0.29.0", +] +build-backend = 'setuptools.build_meta' + +[project] +name="seabird" +dynamic = ["version"] +description="Parser for Sea-Bird's CTD and TSG" +readme = "README.rst" +requires-python = ">=3.6" +keywords = ["oceanography", "ocean data", "CTD", "TSG", "SeaBird", "hydrography", "parser"] +classifiers=[ + "Development Status :: 5 - Production/Stable", + "Natural Language :: English", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering" +] +dependencies = [ + "numpy>=1.14", + "Click>=6.6", +] + +[project.optional-dependencies] +test = [ + "hypothesis >= 6.29.3", + "pytest >= 5.0.0", + "pytest-cov[all]", + "pip >= 9.0.1", + "flake8 >= 3.2.1", + "tox >= 2.3.3", + "coverage >= 4.2", + "pytest >= 3.0.5", + "supportdata >= 0.1.2", + "twine >= 1.8.1", +] +CDF = ["netCDF4>=1.4"] +QC = ["cotede>=0.20.2"] + +[project.urls] +repository = "https://github.com/castelao/seabird" +[project.scripts] +"seabird" = "seabird.cli:cli" + +[tool.black] +line-length = 88 + +[tool.setuptools_scm] +write_to = "seabird/version.py" +git_describe_command = "git describe --dirty --tags --long --match 'v*' --first-parent" diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index c60a304..0000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,9 +0,0 @@ -pip>=9.0.1 -bumpversion>=0.5.3 -wheel>=0.29.0 -flake8>=3.2.1 -tox>=2.3.3 -coverage>=4.2 -pytest>=3.0.5 -supportdata>=0.1.2 -twine>=1.8.1 diff --git a/seabird/__init__.py b/seabird/__init__.py index 1508548..4172e67 100644 --- a/seabird/__init__.py +++ b/seabird/__init__.py @@ -1,10 +1,22 @@ -# -*- coding: utf-8 -*- - -__author__ = 'Guilherme Castelao' -__email__ = 'guilherme@castelao.net' -__version__ = '0.11.5' +__author__ = "Guilherme Castelao" +__email__ = "guilherme@castelao.net" from .cnv import CNV, fCNV from .exceptions import CNVError # __all__ = ['CNV', 'fCNV'] + + +from pkg_resources import get_distribution, DistributionNotFound + +try: + __version__ = get_distribution(__name__).version +except DistributionNotFound: + try: + from .version import version as __version__ + except ImportError: + raise ImportError( + "Failed to find (autogenerated) version.py. " + "This might be because you are installing from GitHub's tarballs, " + "use the PyPI ones." + ) diff --git a/seabird/checks.py b/seabird/checks.py index c69a713..5c140ce 100644 --- a/seabird/checks.py +++ b/seabird/checks.py @@ -1,6 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ Do a sequence of checks on the parsed CNV data """ - diff --git a/seabird/cli.py b/seabird/cli.py index 0c27911..112aca6 100644 --- a/seabird/cli.py +++ b/seabird/cli.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ Command line utilities for package Seabird """ @@ -10,21 +9,22 @@ from .cnv import fCNV from .netcdf import cnv2nc + @click.group() def cli(): - """ Utilities for seabird files - """ + """Utilities for seabird files""" pass -@cli.command(name='cnvdump') -@click.argument('inputfilename', type=click.Path(exists=True)) + +@cli.command(name="cnvdump") +@click.argument("inputfilename", type=click.Path(exists=True)) def dump(inputfilename): """Dump the .cnv content as text - Doesn't matter the version of the .cnv, this command will - show it's content in a unified pattern, as an ASCII text. + Doesn't matter the version of the .cnv, this command will + show it's content in a unified pattern, as an ASCII text. - Consider the idea of a descriptor file with default values. + Consider the idea of a descriptor file with default values. """ try: @@ -38,41 +38,38 @@ def dump(inputfilename): print("file: %s" % inputfilename) print("Global attributes") for a in sorted(data.attrs.keys()): - print("\t\033[93m%s\033[0m: %s" % (a, data.attrs[a])) + print("\t\033[93m{}\033[0m: {}".format(a, data.attrs[a])) print("\nVariabes") for k in data.keys(): print("\033[91m%s\033[0m" % k) for a in data[k].attrs.keys(): - print("\t\033[93m%s\033[0m: %s" % (a, data[k].attrs[a])) + print("\t\033[93m{}\033[0m: {}".format(a, data[k].attrs[a])) + -@cli.command(name='cnv2nc') -@click.option('--outputfilename', default=None, - help='The output netCDF filename.') -@click.argument('inputfilename', type=click.Path(exists=True)) +@cli.command(name="cnv2nc") +@click.option("--outputfilename", default=None, help="The output netCDF filename.") +@click.argument("inputfilename", type=click.Path(exists=True)) def nc(inputfilename, outputfilename): - """ Export a CNV file as a netCDF - """ + """Export a CNV file as a netCDF""" if outputfilename is None: - outputfilename = inputfilename.replace('.cnv','.nc') - click.echo('Saving on %s' % outputfilename) + outputfilename = inputfilename.replace(".cnv", ".nc") + click.echo("Saving on %s" % outputfilename) data = fCNV(inputfilename) cnv2nc(data, outputfilename) -@cli.command(name='ctdqc') -@click.option('--outputfilename', default=None, - help='The output netCDF filename.') -@click.option('--config', default=None, - help='The output netCDF filename.') -@click.argument('inputfilename', type=click.Path(exists=True)) +@cli.command(name="ctdqc") +@click.option("--outputfilename", default=None, help="The output netCDF filename.") +@click.option("--config", default=None, help="The output netCDF filename.") +@click.argument("inputfilename", type=click.Path(exists=True)) def qc(inputfilename, outputfilename, config): - """ - """ + """ """ from cotede.qc import ProfileQC, combined_flag + if outputfilename is None: - outputfilename = inputfilename.replace('.cnv', '.nc') - click.echo('Saving on %s' % outputfilename) + outputfilename = inputfilename.replace(".cnv", ".nc") + click.echo("Saving on %s" % outputfilename) data = fCNV(inputfilename) profile = ProfileQC(data, cfg=config, verbose=False) print(profile.flags) diff --git a/seabird/cnv.py b/seabird/cnv.py index cfea424..a7c3244 100644 --- a/seabird/cnv.py +++ b/seabird/cnv.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - from datetime import datetime, timedelta import re import pkg_resources @@ -10,10 +8,12 @@ try: import hashlib + md5 = hashlib.md5 except ImportError: # for Python << 2.5 import md5 + md5 = md5.new # import codecs @@ -23,42 +23,43 @@ from seabird.exceptions import CNVError from seabird.utils import load_rule -logging.basicConfig(level='INFO', format='%(message)s') -module_logger = logging.getLogger('seabird.cnv') +logging.basicConfig(level="INFO", format="%(message)s") +module_logger = logging.getLogger("seabird.cnv") -class CNV(object): - """ Main class to parse the .cnv style content +class CNV: + """Main class to parse the .cnv style content - Input: - raw_text [String]: The full content of the .cnv file. + Input: + raw_text [String]: The full content of the .cnv file. - Output: - This class responds as it was a dictionary of variables, - and each hash has a Masked Array. + Output: + This class responds as it was a dictionary of variables, + and each hash has a Masked Array. - Ex.: - f = open("CTD.cnv") - text = f.read() - profile = CNV(text) - profile.keys() # Return the available variables - profile['temperature'] # Return the temperature sensor as a - masked array - profile['timeS'] # Return the time in Seconds - profile.attrs # Return a dictionary with the file header + Ex.: + f = open("CTD.cnv") + text = f.read() + profile = CNV(text) + profile.keys() # Return the available variables + profile['temperature'] # Return the temperature sensor as a + masked array + profile['timeS'] # Return the time in Seconds + profile.attrs # Return a dictionary with the file header """ + def __init__(self, raw_text, defaults=None): - module_logger.debug('Initializing CNV class') + module_logger.debug("Initializing CNV class") # Clean empty lines first - self.raw_text = re.sub('\n\s*(?=\n)', '', raw_text) + self.raw_text = re.sub("\n\s*(?=\n)", "", raw_text) self.defaults = defaults self.attrs = {} # ---- self.rule, self.parsed = load_rule(self.raw_text) - if not hasattr(self, 'parsed'): + if not hasattr(self, "parsed"): return self.get_intro() self.get_attrs() @@ -71,14 +72,14 @@ def __init__(self, raw_text, defaults=None): # real attributes to respond right. # It definitely should not be here, but inside some function. try: - for k in defaults['attrs']: - self.attrs[k] = defaults['attrs'][k] + for k in defaults["attrs"]: + self.attrs[k] = defaults["attrs"][k] except: pass - if 'bindata' in self.raw_data().keys(): + if "bindata" in self.raw_data().keys(): self.load_bindata() - elif 'bottledata' in self.raw_data().keys(): + elif "bottledata" in self.raw_data().keys(): self.load_bottledata() else: self.load_data() @@ -88,179 +89,167 @@ def __init__(self, raw_text, defaults=None): self.check_consistency() def keys(self): - """ Return the available keys in self.data - """ - return [d.attrs['name'] for d in self.data] + """Return the available keys in self.data""" + return [d.attrs["name"] for d in self.data] def __getitem__(self, key): - """ Return the key array from self.data - """ + """Return the key array from self.data""" for d in self.data: - if d.attrs['name'] == key: + if d.attrs["name"] == key: return d - logging.error('%s not found' % key) + logging.error("%s not found" % key) @property def attributes(self): return self.attrs def raw_header(self): - r = self.rule['header'] + self.rule['sep'] + r = self.rule["header"] + self.rule["sep"] content_re = re.compile(r, re.VERBOSE) return content_re.search(self.raw_text).groupdict() def raw_data(self): - if ('instrument_type' in self.attrs) and \ - self.attrs['instrument_type'] == 'CTD-bottle': - return {'bottledata': self.parsed['data']} + if ("instrument_type" in self.attrs) and self.attrs[ + "instrument_type" + ] == "CTD-bottle": + return {"bottledata": self.parsed["data"]} - r = self.rule['sep'] + self.rule['data'] + r = self.rule["sep"] + self.rule["data"] content_re = re.compile(r, re.VERBOSE) return content_re.search(self.raw_text).groupdict() def get_intro(self): - """ Parse the intro part of the header - """ - for k in self.rule['intro'].keys(): - pattern = re.compile(self.rule['intro'][k], re.VERBOSE) - if pattern.search(self.parsed['intro']): - self.attrs[k] = pattern.search( - self.parsed['intro'] - ).groupdict()['value'] - self.parsed['intro'] = pattern.sub( - '', self.parsed['intro'], count=1) + """Parse the intro part of the header""" + for k in self.rule["intro"].keys(): + pattern = re.compile(self.rule["intro"][k], re.VERBOSE) + if pattern.search(self.parsed["intro"]): + self.attrs[k] = pattern.search(self.parsed["intro"]).groupdict()[ + "value" + ] + self.parsed["intro"] = pattern.sub("", self.parsed["intro"], count=1) try: - self.attrs['instrument_type'] = \ - self.rule['attributes']['instrument_type'] + self.attrs["instrument_type"] = self.rule["attributes"]["instrument_type"] except: - if 'sbe_model' in self.attrs: - if self.attrs['sbe_model'] in ['9', '17', '19plus', - '19plus V2']: - self.attrs['instrument_type'] = 'CTD' - elif self.attrs['sbe_model'] in ['21', '45']: - self.attrs['instrument_type'] = 'TSG' - + if "sbe_model" in self.attrs: + if self.attrs["sbe_model"] in ["9", "17", "19plus", "19plus V2"]: + self.attrs["instrument_type"] = "CTD" + elif self.attrs["sbe_model"] in ["21", "45"]: + self.attrs["instrument_type"] = "TSG" def get_attrs(self): - """ - """ - for k in self.rule['descriptors'].keys(): - pattern = re.compile(self.rule['descriptors'][k], re.VERBOSE) - if pattern.search(self.parsed['descriptors']): - self.attrs[k] = pattern.search( - self.parsed['descriptors'] - ).groupdict()['value'] - self.parsed['descriptors'] = \ - pattern.sub('', self.parsed['descriptors'], count=1) + """ """ + for k in self.rule["descriptors"].keys(): + pattern = re.compile(self.rule["descriptors"][k], re.VERBOSE) + if pattern.search(self.parsed["descriptors"]): + self.attrs[k] = pattern.search(self.parsed["descriptors"]).groupdict()[ + "value" + ] + self.parsed["descriptors"] = pattern.sub( + "", self.parsed["descriptors"], count=1 + ) # ---- # Temporary solution. Failsafe MD5 try: - self.attrs['md5'] = md5( - self.raw_text.encode('utf-8') - ).hexdigest() + self.attrs["md5"] = md5(self.raw_text.encode("utf-8")).hexdigest() except: - self.attrs['md5'] = md5( - self.raw_text.decode( - 'latin1', 'replace' - ).encode('utf-8') - ).hexdigest() + self.attrs["md5"] = md5( + self.raw_text.decode("latin1", "replace").encode("utf-8") + ).hexdigest() def prepare_data(self): - """ - """ - attrib_text = self.parsed['descriptors'] + """ """ + attrib_text = self.parsed["descriptors"] self.data = [] self.ids = [] # ---- rule_file = "rules/refnames.json" text = pkg_resources.resource_string(__name__, rule_file) - refnames = json.loads(text.decode('utf-8')) + refnames = json.loads(text.decode("utf-8")) # ---- Parse fields - if ('attributes' in self.rule) and \ - (self.rule['attributes']['instrument_type'] == 'CTD-bottle'): - rule = r""" + if ("attributes" in self.rule) and ( + self.rule["attributes"]["instrument_type"] == "CTD-bottle" + ): + rule = r""" \s+ Bottle \s+ Date .* \n \s+ Position \s+ Time .* \n """ - attrib_text = re.search(r"""\n \s+ Bottle \s+ Date(.*)\s*\r?\n \s+ Position \s+ Time""", self.parsed['header'], re.VERBOSE).group(1) - pattern = re.compile(r"""(?P.{11})""", re.VERBOSE) - - self.ids = [0, 1, 2] - self.data = [ma.array([]), ma.array([]), ma.array([])] - self.data[0].attrs = { - 'id': 0, - 'name': 'bottle'} - self.data[1].attrs = { - 'id': 1, - 'name': 'date'} - self.data[2].attrs = { - 'id': 2, - 'name': 'time'} - - for x in pattern.finditer(str(attrib_text)): - self.ids.append(len(self.ids)) - self.data.append(ma.array([])) - try: - reference = refnames[x.groupdict()['varname'].lstrip()] - varname = reference['name'] - #longname = reference['longname'] - except: - varname = x.groupdict()['varname'].lstrip() - self.data[-1].attrs = { - 'id': self.ids[-1], - 'name': varname, - #'longname': x.groupdict()['longname'], - } - return - - pattern = re.compile(self.rule['fieldname'], re.VERBOSE) + attrib_text = re.search( + r"""\n \s+ Bottle \s+ Date(.*)\s*\r?\n \s+ Position \s+ Time""", + self.parsed["header"], + re.VERBOSE, + ).group(1) + pattern = re.compile(r"""(?P.{11})""", re.VERBOSE) + + self.ids = [0, 1, 2] + self.data = [ma.array([]), ma.array([]), ma.array([])] + self.data[0].attrs = {"id": 0, "name": "bottle"} + self.data[1].attrs = {"id": 1, "name": "date"} + self.data[2].attrs = {"id": 2, "name": "time"} + + for x in pattern.finditer(str(attrib_text)): + self.ids.append(len(self.ids)) + self.data.append(ma.array([])) + try: + reference = refnames[x.groupdict()["varname"].lstrip()] + varname = reference["name"] + # longname = reference['longname'] + except: + varname = x.groupdict()["varname"].lstrip() + self.data[-1].attrs = { + "id": self.ids[-1], + "name": varname, + #'longname': x.groupdict()['longname'], + } + return + + pattern = re.compile(self.rule["fieldname"], re.VERBOSE) for x in pattern.finditer(str(attrib_text)): - self.ids.append(int(x.groupdict()['id'])) + self.ids.append(int(x.groupdict()["id"])) try: - reference = refnames[x.groupdict()['name']] - name = reference['name'] + reference = refnames[x.groupdict()["name"]] + name = reference["name"] except: - name = x.groupdict()['name'] + name = x.groupdict()["name"] self.data.append(ma.array([])) self.data[-1].attrs = { - 'id': (x.groupdict()['id']), - 'name': name, - 'longname': x.groupdict()['longname'], - } - attrib_text = pattern.sub('', attrib_text) + "id": (x.groupdict()["id"]), + "name": name, + "longname": x.groupdict()["longname"], + } + attrib_text = pattern.sub("", attrib_text) # ---- Load span limits on each list item - pattern = re.compile(self.rule['fieldspan'], re.VERBOSE) + pattern = re.compile(self.rule["fieldspan"], re.VERBOSE) for x in pattern.finditer(str(attrib_text)): - i = self.ids.index(int(x.groupdict()['id'])) - self.data[i].attrs['span'] = [ - x.groupdict()['valuemin'].strip(), - x.groupdict()['valuemax'].strip()] - attrib_text = pattern.sub('', attrib_text) + i = self.ids.index(int(x.groupdict()["id"])) + self.data[i].attrs["span"] = [ + x.groupdict()["valuemin"].strip(), + x.groupdict()["valuemax"].strip(), + ] + attrib_text = pattern.sub("", attrib_text) def load_data(self): """ - Sure there is a better way to do it. + Sure there is a better way to do it. - Think about, should I do things using nvalues as expected - number of rows? Maybe do it free, and on checks, validate it. - In the case of an incomplete file, I think I should load it - anyways, and the check alerts me that it is missing data. + Think about, should I do things using nvalues as expected + number of rows? Maybe do it free, and on checks, validate it. + In the case of an incomplete file, I think I should load it + anyways, and the check alerts me that it is missing data. - There is a problem here. This atol is just a temporary solution, - but it's not the proper way to handle it. + There is a problem here. This atol is just a temporary solution, + but it's not the proper way to handle it. """ data_rows = re.sub( - '(\n\s*)+\n', '\n', - re.sub('\r\n', '\n', self.raw_data()['data']) - ).split('\n')[:-1] + "(\n\s*)+\n", "\n", re.sub("\r\n", "\n", self.raw_data()["data"]) + ).split("\n")[:-1] data = ma.masked_values( - np.array( - [CNV.__split_row(d) for d in data_rows], dtype=np.float), - float(self.attrs['bad_flag']), - atol=1e-30) + np.array([CNV.__split_row(d) for d in data_rows], dtype=float), + float(self.attrs["bad_flag"]), + atol=1e-30, + ) # Talvez usar o np.fromstring(data, sep=" ") for i in self.ids: attrs = self.data[i].attrs @@ -280,252 +269,249 @@ def __split_row(row): """ n = 11 # number of chars per row - return [row[start:start+n].strip() for start in range(0, len(row), n)] + return [row[start : start + n].strip() for start in range(0, len(row), n)] def load_bindata(self): - content = self.raw_data()['bindata'] + content = self.raw_data()["bindata"] nvars = len(self.ids) - fmt = nvars*'f' + fmt = nvars * "f" linesize = struct.calcsize(fmt) output = [] # FIXME: This does not allow to read the most it can from a corrupted # file, i.e. incomplete file. - for n in range(len(content)/linesize): - output.append(struct.unpack_from(fmt, content, n*linesize)) - data = ma.masked_values( - output, - float(self.attrs['bad_flag']), - atol=1e-30) + for n in range(len(content) / linesize): + output.append(struct.unpack_from(fmt, content, n * linesize)) + data = ma.masked_values(output, float(self.attrs["bad_flag"]), atol=1e-30) for i in self.ids: attrs = self.data[i].attrs self.data[i] = data[:, i] self.data[i].attrs = attrs def load_bottledata(self): - content = self.raw_data()['bottledata'] + content = self.raw_data()["bottledata"] nvars = len(self.ids) data_std = {} + def _convert(x): - if '.' in x: + if "." in x: return float(x) else: return int(x) - - for rec in re.finditer(self.rule['data'], content, re.VERBOSE): + + for rec in re.finditer(self.rule["data"], content, re.VERBOSE): attrs = self.data[0].attrs - self.data[0] = np.append(self.data[0], - int(rec.groupdict()['bottle'])) + self.data[0] = np.append(self.data[0], int(rec.groupdict()["bottle"])) self.data[0].attrs = attrs - d = datetime.strptime(rec.groupdict()['date'].strip(), '%b %d %Y') + d = datetime.strptime(rec.groupdict()["date"].strip(), "%b %d %Y") attrs = self.data[1].attrs self.data[1] = np.append(self.data[1], d.date()) self.data[1].attrs = attrs - d = datetime.strptime(rec.groupdict()['time'].strip(), '%H:%M:%S') + d = datetime.strptime(rec.groupdict()["time"].strip(), "%H:%M:%S") attrs = self.data[2].attrs self.data[2] = np.append(self.data[2], d.time()) self.data[2].attrs = attrs - for n, v in enumerate(re.findall('[-|+|\w|\.]+', - rec.groupdict()['values']), - start=3): + for n, v in enumerate( + re.findall(r"[-|+|\w|\.]+", rec.groupdict()["values"]), start=3 + ): v = _convert(v) attrs = self.data[n].attrs - self.data[n] = np.append(self.data[n],v) + self.data[n] = np.append(self.data[n], v) self.data[n].attrs = attrs - #Add std values - for n, v in enumerate(re.findall('[-|+|\w|\.]+', - rec.groupdict()['values_std']), - start=0): + # Add std values + for n, v in enumerate( + re.findall(r"[-|+|\w|\.]+", rec.groupdict()["values_std"]), start=0 + ): v = _convert(v) if n in data_std: data_std[n] = np.append(data_std[n], v) else: data_std[n] = np.array(v) - + # Append std to self.data nvars_std = len(data_std.keys()) - for std_id,values in data_std.items(): + for std_id, values in data_std.items(): id = len(self.ids) self.ids.append(id) self.data.append(ma.array(values)) attrs = self.data[nvars - nvars_std + std_id].attrs.copy() # Ignore fields that are stats specific - ignore_attributes = ['sdn_parameter_urn','sdn_parameter_name'] - attrs = {key:value for key,value in attrs.items() if key not in ignore_attributes} + ignore_attributes = ["sdn_parameter_urn", "sdn_parameter_name"] + attrs = { + key: value + for key, value in attrs.items() + if key not in ignore_attributes + } if "long_name" in attrs: - attrs['long_name'] += " Standard Deviation" + attrs["long_name"] += " Standard Deviation" attrs["cell_method"] = "scan: standard_deviation" - attrs["name"] += '_sdev' + attrs["name"] += "_sdev" # Scan count per bottle if available - if 'scan_per_bottle' in self.attrs: - attrs["cell_method"] += " (previous " + self.attrs["scan_per_bottle"] + " scans)" + if "scan_per_bottle" in self.attrs: + attrs["cell_method"] += ( + " (previous " + self.attrs["scan_per_bottle"] + " scans)" + ) # Add attributes self.data[id].attrs = attrs - - def products(self): """ - To think about, should I really estimate the products, - or should they be estimated on the fly, on demand? - - To Think About!! : - I'm not sure what would be the best way to handle, - timeQ. I actually couldn't find a definition of what - is that. PyCurrents (Eric) considers the seconds from - 2010-1-1. It's probably a good solution. - For now, I'll use the just the incremental time. At - some point I defined the datetime before, so what - matters now is the increment. - If I have the timeQ, I must have a NMEA (Time), and - Wait a minute, the NMEA Time is probably when the - header is opened, not necessarily when the rossette was - switched on. I'll just follow Eric for now. + To think about, should I really estimate the products, + or should they be estimated on the fly, on demand? + + To Think About!! : + I'm not sure what would be the best way to handle, + timeQ. I actually couldn't find a definition of what + is that. PyCurrents (Eric) considers the seconds from + 2010-1-1. It's probably a good solution. + For now, I'll use the just the incremental time. At + some point I defined the datetime before, so what + matters now is the increment. + If I have the timeQ, I must have a NMEA (Time), and + Wait a minute, the NMEA Time is probably when the + header is opened, not necessarily when the rossette was + switched on. I'll just follow Eric for now. """ - if ('timeS' not in self.keys()): - if ('timeJ' in self.keys()): - j0 = int(self.attrs['datetime'].date().strftime('%j')) - t0 = self.attrs['datetime'].time() - t0 = (t0.hour*60+t0.minute)*60+t0.second + if "timeS" not in self.keys(): + if "timeJ" in self.keys(): + j0 = int(self.attrs["datetime"].date().strftime("%j")) + t0 = self.attrs["datetime"].time() + t0 = (t0.hour * 60 + t0.minute) * 60 + t0.second # I need to subtract one day, but I'm not so sure why should I. # dref = datetime(self.attrs['datetime'].year,1,1) \ # - timedelta(days=1) \ # - self.attrs['datetime'] # dJ0 = datetime(dref.year,1,1) - timeS = ma.masked_all( - self['timeJ'].shape, self['timeJ'].dtype) - timeS.set_fill_value(float(self.attrs['bad_flag'])) - ind = np.nonzero(~ma.getmaskarray(self['timeJ']))[0] + timeS = ma.masked_all(self["timeJ"].shape, self["timeJ"].dtype) + timeS.set_fill_value(float(self.attrs["bad_flag"])) + ind = np.nonzero(~ma.getmaskarray(self["timeJ"]))[0] try: - timeS[ind] = ma.array([ - timedelta(days=t).total_seconds() - t0 - for t in self['timeJ'][ind]-j0]) + timeS[ind] = ma.array( + [ + timedelta(days=t).total_seconds() - t0 + for t in self["timeJ"][ind] - j0 + ] + ) # ma.array( [(dref + timedelta(float(d))).total_seconds() # for d in self['timeJ'][ind]]) except: - D = [timedelta(days=t) for t in self['timeJ'][ind]-j0] + D = [timedelta(days=t) for t in self["timeJ"][ind] - j0] # D = [(dref + timedelta(float(d))) # for d in self['timeJ'][ind]] - timeS[ind] = ma.array([ - d.days * 86400 + d.seconds - t0 for d in D]) - elif ('timeQ' in self.keys()): + timeS[ind] = ma.array([d.days * 86400 + d.seconds - t0 for d in D]) + elif "timeQ" in self.keys(): # yref = self.attrs['datetime'].year - \ # int(self['timeQ'].min()/86400./365.25 # dref = datetime(yref,1,1) # timeS[ind] = self['timeQ'][ind] - self['timeQ'].min() - timeS = ma.masked_all( - self['timeQ'].shape, self['timeQ'].dtype) - timeS.set_fill_value(float(self.attrs['bad_flag'])) - ind = np.nonzero(~ma.getmaskarray(self['timeQ']))[0] + timeS = ma.masked_all(self["timeQ"].shape, self["timeQ"].dtype) + timeS.set_fill_value(float(self.attrs["bad_flag"])) + ind = np.nonzero(~ma.getmaskarray(self["timeQ"]))[0] try: - dref = (self.attrs['datetime'] - - datetime(2000, 1, 1)).total_seconds() + dref = ( + self.attrs["datetime"] - datetime(2000, 1, 1) + ).total_seconds() except: - dref = (self.attrs['datetime'] - - datetime(2000, 1, 1)) - dref = dref.days*24*60*60+dref.seconds - timeS = self['timeQ'] - dref + dref = self.attrs["datetime"] - datetime(2000, 1, 1) + dref = dref.days * 24 * 60 * 60 + dref.seconds + timeS = self["timeQ"] - dref else: return self.data.append(timeS) - self.data[-1].attrs = {'name': 'timeS'} + self.data[-1].attrs = {"name": "timeS"} self.ids.append(len(self.data)) def get_datetime(self): - """ Extract the reference date and time + """Extract the reference date and time - !!! ATENTION, better move it to a rule in the rules. + !!! ATENTION, better move it to a rule in the rules. """ # datetime.strptime('Aug 28 2008 12:33:46','%b %d %Y %H:%M:%S') # Needed to include an :21, because some cases has a [bla bla] # after. # It's probably not the best solution. - self.attrs['datetime'] = datetime.strptime( - self.attrs['start_time'][:20], '%b %d %Y %H:%M:%S') + self.attrs["datetime"] = datetime.strptime( + self.attrs["start_time"][:20], "%b %d %Y %H:%M:%S" + ) def get_location(self): - """ Extract the station location (Lat, Lon) + """Extract the station location (Lat, Lon) - Sometimes the CTD unit station is not connected to the GPS, so it's - written manually in the headerblob. In that case, I'll try to - extract it + Sometimes the CTD unit station is not connected to the GPS, so it's + written manually in the headerblob. In that case, I'll try to + extract it - !! ATENTION!!! Might be a good idea to store lat,lon as floats - with min. and sec. as fractions. + !! ATENTION!!! Might be a good idea to store lat,lon as floats + with min. and sec. as fractions. - On some old format files, the notes where stored with single - * instead of **. One possible solution is if can't load from - notes, try to load from intro. + On some old format files, the notes where stored with single + * instead of **. One possible solution is if can't load from + notes, try to load from intro. - In the rules, it is set to use only . as separator for the - decimals of the minutes. Might be a good idea to allow \.|\, - but on that case I would need to substitute , by . for proper - load as a float. + In the rules, it is set to use only . as separator for the + decimals of the minutes. Might be a good idea to allow \.|\, + but on that case I would need to substitute , by . for proper + load as a float. """ - if ('LATITUDE' in self.attrs) and \ - (re.search(self.rule['LATITUDE'], - self.attrs['LATITUDE'], - re.VERBOSE)): - lat = re.search(self.rule['LATITUDE'], - self.attrs['LATITUDE'], - re.VERBOSE).groupdict() - elif ('notes' in self.raw_header().keys()) and \ - re.search(self.rule['LATITUDE'], - self.raw_header()['notes'], - re.VERBOSE): - lat = re.search(self.rule['LATITUDE'], - self.raw_header()['notes'], - re.VERBOSE).groupdict() + if ("LATITUDE" in self.attrs) and ( + re.search(self.rule["LATITUDE"], self.attrs["LATITUDE"], re.VERBOSE) + ): + lat = re.search( + self.rule["LATITUDE"], self.attrs["LATITUDE"], re.VERBOSE + ).groupdict() + elif ("notes" in self.raw_header().keys()) and re.search( + self.rule["LATITUDE"], self.raw_header()["notes"], re.VERBOSE + ): + lat = re.search( + self.rule["LATITUDE"], self.raw_header()["notes"], re.VERBOSE + ).groupdict() try: - lat_deg = int(lat['degree']) - lat_min = float(lat['minute']) - # self.attrs['lat_deg'] = lat_deg - # self.attrs['lat_min'] = lat_min - self.attrs['LATITUDE'] = lat_deg + lat_min/60. - if lat['hemisphere'] in ['S', 's']: - self.attrs['LATITUDE'] = -self.attrs['LATITUDE'] + lat_deg = int(lat["degree"]) + lat_min = float(lat["minute"]) + # self.attrs['lat_deg'] = lat_deg + # self.attrs['lat_min'] = lat_min + self.attrs["LATITUDE"] = lat_deg + lat_min / 60.0 + if lat["hemisphere"] in ["S", "s"]: + self.attrs["LATITUDE"] = -self.attrs["LATITUDE"] except: pass # self.attrs['LATITUDE'] = None - if ('LONGITUDE' in self.attrs) and \ - (re.search(self.rule['LONGITUDE'], - self.attrs['LONGITUDE'], - re.VERBOSE)): - lon = re.search(self.rule['LONGITUDE'], - self.attrs['LONGITUDE'], - re.VERBOSE).groupdict() - elif ('notes' in self.raw_header().keys()) and \ - (re.search(self.rule['LONGITUDE'], - self.raw_header()['notes'], - re.VERBOSE)): - lon = re.search(self.rule['LONGITUDE'], - self.raw_header()['notes'], - re.VERBOSE).groupdict() + if ("LONGITUDE" in self.attrs) and ( + re.search(self.rule["LONGITUDE"], self.attrs["LONGITUDE"], re.VERBOSE) + ): + lon = re.search( + self.rule["LONGITUDE"], self.attrs["LONGITUDE"], re.VERBOSE + ).groupdict() + elif ("notes" in self.raw_header().keys()) and ( + re.search(self.rule["LONGITUDE"], self.raw_header()["notes"], re.VERBOSE) + ): + lon = re.search( + self.rule["LONGITUDE"], self.raw_header()["notes"], re.VERBOSE + ).groupdict() try: - lon_deg = int(lon['degree']) - lon_min = float(lon['minute']) - # self.attrs['lon_deg'] = lon_deg - # self.attrs['lon_min'] = lon_min - self.attrs['LONGITUDE'] = lon_deg + lon_min/60. - if lon['hemisphere'] in ['W', 'w']: - self.attrs['LONGITUDE'] = \ - -self.attrs['LONGITUDE'] + lon_deg = int(lon["degree"]) + lon_min = float(lon["minute"]) + # self.attrs['lon_deg'] = lon_deg + # self.attrs['lon_min'] = lon_min + self.attrs["LONGITUDE"] = lon_deg + lon_min / 60.0 + if lon["hemisphere"] in ["W", "w"]: + self.attrs["LONGITUDE"] = -self.attrs["LONGITUDE"] except: pass # self.attrs['LONGITUDE'] = None def as_DataFrame(self): - """ Return the data as a pandas.DataFrame + """Return the data as a pandas.DataFrame - ATENTION, I should improve this. + ATENTION, I should improve this. """ try: import pandas as pd @@ -539,62 +525,65 @@ def as_DataFrame(self): tmp[self[k].mask] = np.nan output[k] = tmp output = pd.DataFrame(output) - output['LATITUDE'] = self.attrs['LATITUDE'] - output['LONGITUDE'] = self.attrs['LONGITUDE'] - if "datetime" in self.attrs.keys(): - output['datetime_first_scan'] = self.attrs['datetime'] + output["LATITUDE"] = self.attrs["LATITUDE"] + output["LONGITUDE"] = self.attrs["LONGITUDE"] + if "datetime" in self.attrs.keys(): + output["datetime_first_scan"] = self.attrs["datetime"] return output def check_consistency(self): - """ Some consistency checks + """Some consistency checks - Check if the dataset is consistent with the info from the - header. + Check if the dataset is consistent with the info from the + header. - Might be a good idea to move these tests outside the - class. + Might be a good idea to move these tests outside the + class. """ - if 'nquan' in self.attrs: + if "nquan" in self.attrs: # Check if the number of variables is equal to nquan - nquan = int(self.attrs['nquan']) + nquan = int(self.attrs["nquan"]) if nquan != len(self.keys()): - module_logger.warning( - "It was supposed to has %s variables." % (nquan)) + module_logger.warning("It was supposed to has %s variables." % (nquan)) - if 'nvalues' in self.attrs: + if "nvalues" in self.attrs: # Check if each variable have nvalues - nvalues = int(self.attrs['nvalues']) + nvalues = int(self.attrs["nvalues"]) for k in self.keys(): if len(self[k]) != nvalues: module_logger.warning( - ("\033[91m%s was supposed to have %s values, " - "but found only %s.\033[0m") % - (k, nvalues, len(self[k]))) + ( + "\033[91m%s was supposed to have %s values, " + "but found only %s.\033[0m" + ) + % (k, nvalues, len(self[k])) + ) class fCNV(CNV): - """ The same of CNV class, but the input is a filename - instead of the straight text. + """The same of CNV class, but the input is a filename + instead of the straight text. - Input: - filename [String]: The path/filename to the CTD file. + Input: + filename [String]: The path/filename to the CTD file. - Output: - This class responds as it was a dictionary of variables, - and each hash has a Masked Array. + Output: + This class responds as it was a dictionary of variables, + and each hash has a Masked Array. - Check out the doc of the class CNV for more details. + Check out the doc of the class CNV for more details. - Ex.: - profile = fCNV("~/data/CTD.cnv") - profile.keys() # Return the available variables - profile.attrs # Return a dictionary with the file header - masked array + Ex.: + profile = fCNV("~/data/CTD.cnv") + profile.keys() # Return the available variables + profile.attrs # Return a dictionary with the file header + masked array """ + def __init__(self, filename, defaultsfile=None): - module_logger.debug('Initializing fCNV class with file: %s' % filename) + module_logger.debug("Initializing fCNV class with file: %s" % filename) self.filename = filename @@ -615,14 +604,14 @@ def __init__(self, filename, defaultsfile=None): defaults = None try: - super(fCNV, self).__init__(text, defaults) + super().__init__(text, defaults) except CNVError as e: - if e.tag == 'noparsingrule': + if e.tag == "noparsingrule": e.msg += " File: %s" % self.filename raise - self.name = 'fCNV' - self.attrs['filename'] = os.path.basename(filename) + self.name = "fCNV" + self.attrs["filename"] = os.path.basename(filename) def load_defaults(self, defaultsfile): pass diff --git a/seabird/exceptions.py b/seabird/exceptions.py index 0e99ad8..bbab13f 100644 --- a/seabird/exceptions.py +++ b/seabird/exceptions.py @@ -1,14 +1,12 @@ -# -*- coding: utf-8 -*- - - class CNVError(Exception): """Base class for exceptions in this module.""" + def __init__(self, tag, msg=None): self.tag = tag if msg is not None: self.msg = msg - elif tag == 'noparsingrule': + elif tag == "noparsingrule": self.msg = "There are no rules able to parse the input." def __str__(self): diff --git a/seabird/netcdf.py b/seabird/netcdf.py index e7913d3..80c988b 100644 --- a/seabird/netcdf.py +++ b/seabird/netcdf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ Export the parsed data into a NetCDF following different patterns """ @@ -15,26 +14,28 @@ except: module_logger.warning("netCDF4 is not available.") + def get_cf_attributes(attrs): - if 'longname' in attrs: - attrs['long_name'] = attrs.pop('longname') - value_min = attrs.pop('valuemin') if 'valuemin' in attrs else None - value_max = attrs.pop('valuemax') if 'valuemax' in attrs else None + if "longname" in attrs: + attrs["long_name"] = attrs.pop("longname") + value_min = attrs.pop("valuemin") if "valuemin" in attrs else None + value_max = attrs.pop("valuemax") if "valuemax" in attrs else None if value_min and value_max: - attrs['actual_range'] = (value_min, value_max) + attrs["actual_range"] = (value_min, value_max) return attrs + def cnv2nc(data, filename): - """ Save a CNV() object into filename as a NetCDF + """Save a CNV() object into filename as a NetCDF - To save the CTD.cnv into a NetCDF, just run: + To save the CTD.cnv into a NetCDF, just run: - profile = cnv.fCNV("CTD.cnv") - cnv2nc(profile, "CTD.nc") + profile = cnv.fCNV("CTD.cnv") + cnv2nc(profile, "CTD.nc") """ logging.info("Saving netcdf output file: %s" % filename) - nc = netCDF4.Dataset(filename, 'w', format='NETCDF4') + nc = netCDF4.Dataset(filename, "w", format="NETCDF4") nc.history = "Created by cnv2nc (PyCNV)" @@ -52,8 +53,10 @@ def cnv2nc(data, filename): module_logger.warning("Failed to write global attribute %s" % a) real_values = len(data[data.keys()[0]]) - if 'nvalue' not in data.attributes: - logging.warning('Unknown original data length, nvalues not available within the cnv file.') + if "nvalue" not in data.attributes: + logging.warning( + "Unknown original data length, nvalues not available within the cnv file." + ) elif real_values != int(data.attributes["nvalues"]): logging.warning( "\033[91mATENTION '%s' records available differ from nvalues='%s'." @@ -76,32 +79,38 @@ def cnv2nc(data, filename): continue # Rename Variable if bad character - if '/' in name: - nc_name = name.replace('/','Per') - logging.info("Replace %s in variable by %s to be compatible with NetCDF" % (name,nc_name)) + if "/" in name: + nc_name = name.replace("/", "Per") + logging.info( + "Replace {} in variable by {} to be compatible with NetCDF".format( + name, nc_name + ) + ) else: nc_name = name - + # Add variable to dataset - # handle datetime variables, convert to string format - if var.dtype == object and type(var[0]) in (datetime,time,date): + # handle datetime variables, convert to string format + if var.dtype == object and type(var[0]) in (datetime, time, date): str_data = var.data.astype(str) - cdf_variables[nc_name] = nc.createVariable(nc_name, "S%g" % len(str_data[0]), ('scan',)) + cdf_variables[nc_name] = nc.createVariable( + nc_name, "S%g" % len(str_data[0]), ("scan",) + ) cdf_variables[nc_name][:] = str_data else: - cdf_variables[nc_name] = nc.createVariable(nc_name, var.dtype, ('scan',)) + cdf_variables[nc_name] = nc.createVariable(nc_name, var.dtype, ("scan",)) cdf_variables[nc_name][:] = var.data - + # Ignore unknown fill_value - if var.fill_value not in ('?','N/A'): + if var.fill_value not in ("?", "N/A"): cdf_variables[nc_name].missing_value = var.fill_value # Add Attributes - for key,value in get_cf_attributes(var.attrs).items(): + for key, value in get_cf_attributes(var.attrs).items(): # Ignore name and empty attributes - if key in ['name'] or value == None: + if key in ["name"] or value == None: continue - logging.info("\t\033[93m%s\033[0m: %s" % (key, value)) + logging.info("\t\033[93m{}\033[0m: {}".format(key, value)) cdf_variables[nc_name].__setattr__(key, value) nc.close() diff --git a/seabird/qc.py b/seabird/qc.py index 7bb6899..33cd88d 100644 --- a/seabird/qc.py +++ b/seabird/qc.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Licensed under a 3-clause BSD style license - see LICENSE.rst import logging @@ -9,28 +8,26 @@ from . import fCNV from .exceptions import CNVError -module_logger = logging.getLogger('seabird.qc') +module_logger = logging.getLogger("seabird.qc") class fProfileQC(ProfileQC): - """ Apply ProfileQC from CoTeDe straight from a file. - """ - def __init__(self, inputfile, cfg=None, saveauxiliary=True, verbose=True, - logger=None): - """ - """ - self.logger = logging.getLogger(logger or 'seabird.qc.fProfileQC') - self.name = 'fProfileQC' + """Apply ProfileQC from CoTeDe straight from a file.""" + + def __init__( + self, inputfile, cfg=None, saveauxiliary=True, verbose=True, logger=None + ): + """ """ + self.logger = logging.getLogger(logger or "seabird.qc.fProfileQC") + self.name = "fProfileQC" try: # Not the best way, but will work for now. I should pass # the reference for the logger being used. profile = fCNV(inputfile) except CNVError as e: - self.attributes['filename'] = basename(inputfile) + self.attributes["filename"] = basename(inputfile) logging.error(e.msg) raise - super(fProfileQC, self).__init__( - profile, cfg=cfg, saveauxiliary=saveauxiliary, - verbose=verbose) + super().__init__(profile, cfg=cfg, saveauxiliary=saveauxiliary, verbose=verbose) diff --git a/seabird/rules/refnames.json b/seabird/rules/refnames.json index 641fb07..88fb772 100644 --- a/seabird/rules/refnames.json +++ b/seabird/rules/refnames.json @@ -1,101 +1,101 @@ { "sal00": { - "long_name": "sea_water_practical_salinity", + "long_name": "sea_water_practical_salinity", "name": "PSAL" - }, + }, "sal11": { - "long_name": "secondary_sea_water_practical_salinity", + "long_name": "secondary_sea_water_practical_salinity", "name": "PSAL2" - }, + }, "sbeox0V": { - "long_name": "sea_water_oxygen_voltage", + "long_name": "sea_water_oxygen_voltage", "name": "oxygenvoltage" - }, + }, "density00": { - "long_name": "sea_water_density", + "long_name": "sea_water_density", "name": "density" - }, + }, "density11": { - "long_name": "sea_water_density", + "long_name": "sea_water_density", "name": "density" - }, + }, "t068C": { - "long_name": "sea_water_temperature", + "long_name": "sea_water_temperature", "name": "temperature" - }, + }, "potemp090C": { - "long_name": "sea_water_potential_temperature", + "long_name": "sea_water_potential_temperature", "name": "potemperature" - }, + }, "timeS": { - "units": "seconds", + "units": "seconds", "name": "timeS" - }, + }, "c1S/m": { - "long_name": "secondary_sea_water_electrical_conductivity", + "long_name": "secondary_sea_water_electrical_conductivity", "name": "CNDC2" - }, + }, "t090C": { - "long_name": "sea_water_temperature", + "long_name": "sea_water_temperature", "name": "TEMP" - }, + }, "T090C": { "long_name": "sea_water_temperature", "name": "TEMP" }, "longitude": { - "units": "degrees", + "units": "degrees", "name": "LONGITUDE" - }, + }, "t190C": { - "long_name": "secondary_sea_water_temperature", + "long_name": "secondary_sea_water_temperature", "name": "TEMP2" - }, + }, "dz/dtM": { - "name": "descentrate", + "name": "descentrate", "descentrate": null - }, + }, "c0S/m": { - "long_name": "sea_water_electrical_conductivity", + "long_name": "sea_water_electrical_conductivity", "name": "CNDC" - }, + }, "svCM": { - "name": "soundspeed", + "name": "soundspeed", "longname": "speed_of_sound_in_sea_water" - }, + }, "sigma-t11": { - "long_name": "sea_water_sigma_t", + "long_name": "sea_water_sigma_t", "name": "sigma_t2" - }, + }, "flCUVA": { "name": "fluorescence" - }, + }, "prDM": { - "long_name": "sea_water_pressure", + "long_name": "sea_water_pressure", "name": "PRES" - }, + }, "sbeox0Mm/Kg": { - "long_name": "moles_of_oxygen_per_unit_mass_in_sea_water", + "long_name": "moles_of_oxygen_per_unit_mass_in_sea_water", "name": "oxygen" - }, + }, "potemp190C": { - "long_name": "secondary_sea_water_potential_temperature", + "long_name": "secondary_sea_water_potential_temperature", "name": "potemperature2" - }, + }, "latitude": { - "units": "degrees", + "units": "degrees", "name": "LATITUDE" - }, + }, "sbeox0ML/L": { - "long_name": "volume_fraction_of_oxygen_in_sea_water", + "long_name": "volume_fraction_of_oxygen_in_sea_water", "name": "oxygen_ml_L" - }, + }, "depSM": { - "long_name": "depth", + "long_name": "depth", "name": "DEPTH" - }, + }, "sigma-t00": { - "long_name": "sea_water_sigma_t", + "long_name": "sea_water_sigma_t", "name": "sigma_t" } } diff --git a/seabird/utils.py b/seabird/utils.py index 8c4c482..476f450 100644 --- a/seabird/utils.py +++ b/seabird/utils.py @@ -8,12 +8,11 @@ from seabird.exceptions import CNVError -module_logger = logging.getLogger('seabird.utils') +module_logger = logging.getLogger("seabird.utils") -def make_file_list(inputdir, inputpattern=".*\.cnv"): - """ Search inputdir recursively for inputpattern - """ +def make_file_list(inputdir, inputpattern=r".*\.cnv"): + """Search inputdir recursively for inputpattern""" inputfiles = [] for dirpath, dirnames, filenames in os.walk(inputdir): for filename in filenames: @@ -28,7 +27,7 @@ def basic_logger(logger=None): assert type(logger) is logging.Logger else: # create logger - logger = logging.getLogger('CNV logger') + logger = logging.getLogger("CNV logger") logger.setLevel(logging.DEBUG) # create console handler and set level to debug @@ -37,7 +36,8 @@ def basic_logger(logger=None): # create formatter formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) # add formatter to ch ch.setFormatter(formatter) @@ -49,98 +49,108 @@ def basic_logger(logger=None): def press2depth(press, latitude): - """ calculate depth from pressure - http://www.seabird.com/application_notes/AN69.htm + """calculate depth from pressure + http://www.seabird.com/application_notes/AN69.htm - ATENTION, move it to fluid. + ATENTION, move it to fluid. """ import numpy as np - x = np.sin((np.pi/180) * latitude / 57.29578)**2 + + x = np.sin((np.pi / 180) * latitude / 57.29578) ** 2 g = 9.780318 * (1.0 + (5.2788e-3 + 2.36e-5 * x) * x) + 1.092e-6 * press - depth = -((((-1.82e-15 * press + 2.279e-10) * press - 2.2512e-5) * - press + 9.72659) * press) / g + depth = ( + -( + (((-1.82e-15 * press + 2.279e-10) * press - 2.2512e-5) * press + 9.72659) + * press + ) + / g + ) return depth def load_rule(raw_text): - """ Load the adequate rules to parse the data + """Load the adequate rules to parse the data - It should try all available rules, one by one, and use the one - which fits. + It should try all available rules, one by one, and use the one + which fits. """ - rules_dir = 'rules' + rules_dir = "rules" rule_files = pkg_resources.resource_listdir(__name__, rules_dir) - rule_files = [f for f in rule_files if re.match('^cnv.*\.json$', f)] + rule_files = [f for f in rule_files if re.match(r"^cnv.*\.json$", f)] for rule_file in rule_files: text = pkg_resources.resource_string( - __name__, os.path.join(rules_dir, rule_file)) - rule = json.loads(text.decode('utf-8')) + __name__, os.path.join(rules_dir, rule_file) + ) + rule = json.loads(text.decode("utf-8")) # Should I load using codec, for UTF8?? Do I need it? # f = codecs.open(rule_file, 'r', 'utf-8') # rule = yaml.load(f.read()) # Transitioning for the new rules concept for regexp. - if 'sep' in rule: - r = rule['header'] + rule['sep'] + rule['data'] + if "sep" in rule: + r = rule["header"] + rule["sep"] + rule["data"] else: - r = "(?P
" + rule['header'] + ")" + \ - "(?P (?:" + rule['data'] + ")+)" + r = ( + "(?P
" + + rule["header"] + + ")" + + "(?P (?:" + + rule["data"] + + ")+)" + ) content_re = re.compile(r, re.VERBOSE) if re.search(r, raw_text, re.VERBOSE): - #logging.debug("Using rules from: %s" % rule_file) - #self.rule = rule + # logging.debug("Using rules from: %s" % rule_file) + # self.rule = rule parsed = content_re.search(raw_text).groupdict() return rule, parsed # If haven't returned a rule by this point, raise an exception. - #logging.error("No rules able to parse it") - raise CNVError(tag='noparsingrule') + # logging.error("No rules able to parse it") + raise CNVError(tag="noparsingrule") def seabird_dir(subdir=None): """Return the local support/config directory - Returns a local directory used to store testing data. The default path - (~/.config/seabird) can be overwritten by the environment variable - SEABIRD_DIR. + Returns a local directory used to store testing data. The default path + (~/.config/seabird) can be overwritten by the environment variable + SEABIRD_DIR. """ - spath = os.getenv('SEABIRD_DIR', '~/.config/seabird') - return os.path.expanduser(spath).replace('/', os.path.sep) + spath = os.getenv("SEABIRD_DIR", "~/.config/seabird") + return os.path.expanduser(spath).replace("/", os.path.sep) def sampledata(filename=None, dtype=None): """Return the full path to local sample data - The first time it will download the sample data files into the default - seabird directory. Check seabird_dir() if you want to modify that. + The first time it will download the sample data files into the default + seabird directory. Check seabird_dir() if you want to modify that. """ try: from supportdata import download_file except ImportError: module_logger.error( - "Missing package supportdata. Try:\npip install supportdata") + "Missing package supportdata. Try:\npip install supportdata" + ) - outputdir = os.path.join(seabird_dir(), 'sampledata') + outputdir = os.path.join(seabird_dir(), "sampledata") if not os.path.exists(outputdir): os.makedirs(outputdir) src = "https://raw.githubusercontent.com/castelao/seabird/dev/sampledata" filesdb = { - 'PIRA001.cnv': { - 'dtype': "CTD", 'md5': "5ded777144300b63c8775b1d7f033f92"}, - 'dPIRX003.cnv': { - 'dtype': "CTD", 'md5': "4b941b902a3aea7d99e1cf4c78c51877"}, - 'dPIRX010.cnv': { - 'dtype': "CTD", 'md5': "8691409accb534c83c8bd412afbdd285"}, - 'Hotin.cnv': { - 'dtype': "CTD", 'md5': "814dc769c0775327bbe5b0f489dfb571"}, - 'missing_whitespace.cnv': { - 'dtype': "CTD", 'md5': "c1f00cebb5f00f6aaebc316bac3fd86a"}, - 'SK287_CTD05.cnv': { - 'dtype': "CTD", 'md5': "08e974c46ed603442eecf9145031a6c4"}, - 'sta0860.cnv': { - 'dtype': "CTD", 'md5': "1c788c4d9b82b527ebf0c2fb9200600e"}, + "PIRA001.cnv": {"dtype": "CTD", "md5": "5ded777144300b63c8775b1d7f033f92"}, + "dPIRX003.cnv": {"dtype": "CTD", "md5": "4b941b902a3aea7d99e1cf4c78c51877"}, + "dPIRX010.cnv": {"dtype": "CTD", "md5": "8691409accb534c83c8bd412afbdd285"}, + "Hotin.cnv": {"dtype": "CTD", "md5": "814dc769c0775327bbe5b0f489dfb571"}, + "missing_whitespace.cnv": { + "dtype": "CTD", + "md5": "c1f00cebb5f00f6aaebc316bac3fd86a", + }, + "SK287_CTD05.cnv": {"dtype": "CTD", "md5": "08e974c46ed603442eecf9145031a6c4"}, + "sta0860.cnv": {"dtype": "CTD", "md5": "1c788c4d9b82b527ebf0c2fb9200600e"}, #'more_after_file_type.cnv': { # 'dtype': "CTD", 'md5': "e5bffcfdcaf52333773bbe7abe98b06d"}, # ['CTD', 'laurynas.cnv', '6f188d53ac2d7aaaf4ce69c0e5c514ec'], @@ -150,30 +160,32 @@ def sampledata(filename=None, dtype=None): # 'dtype': "TSG", 'md5': "d87cea33bfe37e22dc8e563f77cbf307"}, # 'MI18MHDR.btl': { # 'dtype': "btl", 'md5': "775f2a6c6585f1cffb0038111580e5a1"}, - } + } if dtype is not None: - for f in [f for f in filesdb if filesdb[f]['dtype'] != dtype]: - del(filesdb[f]) + for f in [f for f in filesdb if filesdb[f]["dtype"] != dtype]: + del filesdb[f] if filename is None: datafile = [] for f in filesdb: print(f) - download_file(outputdir=outputdir, - url=os.path.join(src, filesdb[f]["dtype"], f), - filename=f, - md5hash=filesdb[f]["md5"]) + download_file( + outputdir=outputdir, + url=os.path.join(src, filesdb[f]["dtype"], f), + filename=f, + md5hash=filesdb[f]["md5"], + ) datafile.append(os.path.join(outputdir, f)) return datafile elif filename in filesdb: - download_file(outputdir=outputdir, - url=os.path.join(src, - filesdb[filename]["dtype"], - filename), - filename=filename, - md5hash=filesdb[filename]["md5"]) + download_file( + outputdir=outputdir, + url=os.path.join(src, filesdb[filename]["dtype"], filename), + filename=filename, + md5hash=filesdb[filename]["md5"], + ) return os.path.join(outputdir, filename) else: diff --git a/setup.cfg b/setup.cfg index 7ca233b..87cb282 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,23 +1,5 @@ -[bumpversion] -current_version = 0.11.5 -commit = True -tag = True - -[bumpversion:file:VERSION] -search = version='{current_version}' -replace = version='{new_version}' - -[bumpversion:file:setup.py] -search = version='{current_version}' -replace = version='{new_version}' - -[bumpversion:file:seabird/__init__.py] -search = __version__ = '{current_version}' -replace = __version__ = '{new_version}' - [wheel] universal = 1 [flake8] exclude = docs - diff --git a/setup.py b/setup.py index a081dfe..ec5ade3 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # Licensed under a 3-clause BSD style license - see LICENSE.rst @@ -9,59 +8,16 @@ from distutils.core import setup from codecs import open - -with open('README.rst', encoding='utf-8') as f: - readme = f.read() - -with open('HISTORY.rst', encoding='utf-8') as f: - history = f.read().replace('.. :changelog:', '') - -with open('requirements.txt', encoding='utf-8') as f: - requirements = f.read() - -with open('requirements_dev.txt', encoding='utf-8') as f: - requirements_test = f.read() - setup( - name='seabird', - version='0.11.5', - description="Parser for Sea-Bird's CTD and TSG.", - long_description=readme + '\n\n' + history, author='Guilherme Castelao , Luiz Irber', author_email='guilherme@castelao.net, luiz.irber@gmail.com', - url='http://seabird.castelao.net', packages=[ 'seabird', ], package_dir={'seabird': 'seabird'}, include_package_data=True, - install_requires=requirements, license='3-clause BSD', zip_safe=False, - keywords='oceanography ocean data CTD TSG SeaBird hydrography parser', - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Topic :: Scientific/Engineering', - ], - entry_points={ - 'console_scripts': - ['seabird=seabird.cli:cli'] - }, platforms='any', - extras_require={ - 'test': requirements_test, - 'CDF': ["netCDF4>=1.4"], - 'QC': ["cotede>=0.20.2"] - } ) diff --git a/tests/test_import.py b/tests/test_import.py index 2206086..9ae93cb 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -1,12 +1,11 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- def test_ImportShortcut(): - """ Test shortcut to import CNV & fCNV + """Test shortcut to import CNV & fCNV - CNV & fCNV are actually inside seabird.cnv, but to simplify - I placed a shortcut. + CNV & fCNV are actually inside seabird.cnv, but to simplify + I placed a shortcut. """ from seabird import CNV from seabird import fCNV diff --git a/tests/test_parse.py b/tests/test_parse.py index dfc4da8..f417392 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ Parse some references with fCNV @@ -13,54 +12,53 @@ def test_answer(): - """ Load different .cnv versions with fCNV - """ - datafiles = sampledata() - assert len(datafiles) > 0, \ - "No files available for testing at: %s" % datafiles - for f in datafiles: - print("Loading file: %s" % f) - profile = fCNV(f) - assert len(profile.keys()) > 0 - assert len(profile.attrs.keys()) > 0 + """Load different .cnv versions with fCNV""" + datafiles = sampledata() + assert len(datafiles) > 0, "No files available for testing at: %s" % datafiles + for f in datafiles: + print("Loading file: %s" % f) + profile = fCNV(f) + assert len(profile.keys()) > 0 + assert len(profile.attrs.keys()) > 0 + def test_blank_note_line(): - """ Temporary solution to avoid #37 & #40 + """Temporary solution to avoid #37 & #40 - Guarantee that issues #37 & #40 will not repeat. Maybe change this - to test with load_rule instead? + Guarantee that issues #37 & #40 will not repeat. Maybe change this + to test with load_rule instead? """ blank_note_line = "* Sea-Bird SBE9 Data File:\n*\n* \n# name 3 = depSM\n# file_type = ascii\n*END*\n1\n" from seabird.utils import load_rule + load_rule(blank_note_line) def test_column_header(): """Parse with or without the data columns headers - The data columns usually have a header, like: - ... - # file_type = ascii - *END* - Depth Press - 3.973 3.995 - ... + The data columns usually have a header, like: + ... + # file_type = ascii + *END* + Depth Press + 3.973 3.995 + ... - Parse the data even without the column header (Depth & Press). + Parse the data even without the column header (Depth & Press). """ raw = "* Sea-Bird SBE 9 Data File:\n* System UpLoad Time = Aug 01 2011 11:34:32\n# nquan = 2\n# nvalues = 3\n# name 0 = depSM: Depth [salt water, m]\n# name 1 = prDM: Pressure, Digiquartz [db]\n# start_time = Aug 01 2011 11:34:32\n# bad_flag = -9.990e-29\n# datcnv_date = Aug 02 2011 04:16:47, 7.18c\n# file_type = ascii\n*END*\n Depth Press \n 3.973 3.995\n 4.079 4.102\n 3.902 3.924\n" profile = CNV(raw) - assert len(profile['DEPTH']) == 3 - assert profile['DEPTH'][0] == 3.973 + assert len(profile["DEPTH"]) == 3 + assert profile["DEPTH"][0] == 3.973 # Now without the headers - profile = CNV(raw.replace(' Depth Press \n' ,'')) - assert len(profile['DEPTH']) == 3 - assert profile['DEPTH'][0] == 3.973 + profile = CNV(raw.replace(" Depth Press \n", "")) + assert len(profile["DEPTH"]) == 3 + assert profile["DEPTH"][0] == 3.973 def test_empty_lines(): - """Ignore corrupted empty lines - """ + """Ignore corrupted empty lines""" raw = "* Sea-Bird SBE 9 Data File:\n\n* System UpLoad Time = Aug 01 2011 11:34:32\n \n# nquan = 2\n# nvalues = 3\n# name 0 = depSM: Depth [salt water, m]\n# name 1 = prDM: Pressure, Digiquartz [db]\n# start_time = Aug 01 2011 11:34:32\n# bad_flag = -9.990e-29\n# datcnv_date = Aug 02 2011 04:16:47, 7.18c\n# file_type = ascii\n*END*\n Depth Press \n 3.973 3.995\n 4.079 4.102\n 3.902 3.924\n" profile = CNV(raw) diff --git a/tests/test_rules.py b/tests/test_rules.py index 3c07e04..8d514d1 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ Check the rules """ @@ -12,18 +11,18 @@ def test_load_available_rules(): - """ Try to read all available rules + """Try to read all available rules - https://github.com/castelao/seabird/issues/7 + https://github.com/castelao/seabird/issues/7 """ - rules_dir = 'rules' + rules_dir = "rules" rule_files = pkg_resources.resource_listdir(seabird.__name__, rules_dir) - rule_files = [f for f in rule_files if re.match('^(?!refnames).*json$', f)] + rule_files = [f for f in rule_files if re.match("^(?!refnames).*json$", f)] for rule_file in rule_files: print("loading rule: %s", (rule_file)) text = pkg_resources.resource_string( - seabird.__name__, - os.path.join(rules_dir, rule_file)) - rule = json.loads(text.decode('utf-8'), encoding="utf-8") + seabird.__name__, os.path.join(rules_dir, rule_file) + ) + rule = json.loads(text.decode("utf-8")) assert type(rule) == dict assert len(rule.keys()) > 0 diff --git a/tests/test_serialize.py b/tests/test_serialize.py index 4b800bd..2bff5b6 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """ Check if pickle can serialize seabird's data objects @@ -14,13 +13,11 @@ def test_serialize_fCNV(): - """ Serialize fCNV - """ - datafiles = sampledata() - assert len(datafiles) > 0, \ - "No files available for testing at: %s" % datafiles - for f in datafiles: - profile = fCNV(f) - profile2 = pickle.loads(pickle.dumps(profile)) - assert profile.attrs == profile2.attrs - assert (profile.data == profile.data) + """Serialize fCNV""" + datafiles = sampledata() + assert len(datafiles) > 0, "No files available for testing at: %s" % datafiles + for f in datafiles: + profile = fCNV(f) + profile2 = pickle.loads(pickle.dumps(profile)) + assert profile.attrs == profile2.attrs + assert profile.data == profile.data