Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regular maintenance 202405 #60

Merged
merged 4 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,12 @@ jobs:
poe check

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
files: ./coverage.xml
flags: unittests
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: false
fail_ci_if_error: true
13 changes: 13 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# https://docs.codecov.io/docs/common-recipe-list
# https://docs.codecov.io/docs/commit-status#patch-status

coverage:
status:

project:
default:
target: 80%

patch:
default:
informational: true
4 changes: 4 additions & 0 deletions doc/backlog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ Next steps
- [o] Option to suppress ``NOT NULL`` constraint. /cc @seut
- [o] Different kinds of sampling methods? /cc @seut
- [o] Performance considerations / HTTP server
- [o] Look at JSON Schema for DDL definition.

- https://pypi.org/project/JSONSchema2DB/
- https://pypi.org/project/jsonschema2ddl/

Formats
=======
Expand Down
1 change: 1 addition & 0 deletions examples/infer_ndjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- https://en.wikipedia.org/wiki/JSON_streaming
- https://github.com/ndjson/ndjson.github.io/issues/1#issuecomment-109935996
"""

import io
import typing as t

Expand Down
69 changes: 33 additions & 36 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,6 @@ authors = [
{ name = "Richard Pobering", email = "[email protected]" },
]
requires-python = ">=3.8"
dependencies = [
"click<9",
"colorama<1",
"crash",
"crate[sqlalchemy]",
"ddlgenerator<0.2",
"frictionless[excel,json,ods,parquet,sql]<5.6",
"fsspec[gcs,github,http,s3]==2024.3.1",
"json_stream<3",
"line-protocol-parser<2",
"odfpy<2",
"pandas<1.6",
"requests<2.32",
"sql-formatter<0.7",
"sqlmakeuper<0.2",
"urllib3<2",
]
dynamic = [
"version",
]
classifiers = [
"Development Status :: 3 - Alpha",
"Environment :: Console",
Expand All @@ -79,10 +59,12 @@ classifiers = [
"Operating System :: POSIX :: Linux",
"Operating System :: Unix",
"Programming Language :: Python",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Communications",
"Topic :: Database",
"Topic :: Documentation",
Expand All @@ -99,14 +81,33 @@ classifiers = [
"Topic :: Text Processing",
"Topic :: Utilities",
]
dynamic = [
"version",
]
dependencies = [
"click<9",
"colorama<1",
"crash",
"crate[sqlalchemy]",
"ddlgenerator<0.2",
"frictionless[excel,json,ods,parquet,sql]<5.6",
"fsspec[gcs,github,http,s3]==2024.3.1",
"json_stream<3",
"line-protocol-parser<2",
"odfpy<2",
"pandas<2",
"requests<2.32",
"sql-formatter<0.7",
"sqlmakeuper<0.2",
"urllib3<2",
]
[project.optional-dependencies]
develop = [
"black<24",
"hunter<4",
"mypy==1.8.0",
"mypy<1.11",
"poethepoet<0.27",
"pyproject-fmt<1.8",
"ruff==0.1.14",
"pyproject-fmt<1.9",
"ruff<0.5",
"types-requests<2.32",
"validate-pyproject<0.17",
]
Expand All @@ -126,17 +127,14 @@ test = [
"pytest-cov<5",
"pytest-timeout<3",
]

[project.urls]
changelog = "https://github.com/daq-tools/skeem/blob/main/CHANGES.rst"
documentation = "https://github.com/daq-tools/skeem"
homepage = "https://github.com/daq-tools/skeem"
repository = "https://github.com/daq-tools/skeem"

[project.scripts]
skeem = "skeem.cli:cli"


[tool.black]
line-length = 120

Expand Down Expand Up @@ -177,11 +175,10 @@ markers = [
"slow",
]


[tool.ruff]
line-length = 120

select = [
lint.select = [
# Bandit
"S",
# Bugbear
Expand Down Expand Up @@ -209,7 +206,7 @@ select = [
"RET",
]

extend-ignore = [
lint.extend-ignore = [
# zip() without an explicit strict= parameter
"B905",
# df is a bad variable name. Be kinder to your future self.
Expand All @@ -220,7 +217,7 @@ extend-ignore = [
"RET505",
]

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
"skeem/core.py" = ["E402"]
"skeem/fastparquet/*" = ["E722"]
"skeem/pandas/*" = ["S101"]
Expand All @@ -246,15 +243,15 @@ check = [
]

format = [
{ cmd = "black ." },
{ cmd = "ruff format" },
# Configure Ruff not to auto-fix (remove!) unused variables (F841) and `print` statements (T201).
{ cmd = "ruff --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 ." },
{ cmd = "pyproject-fmt pyproject.toml" },
{ cmd = "ruff check --fix --ignore=ERA --ignore=F401 --ignore=F841 --ignore=T20 ." },
{ cmd = "pyproject-fmt --keep-full-version pyproject.toml" },
]

lint = [
{ cmd = "ruff ." },
{ cmd = "black --check ." },
{ cmd = "ruff format --check" },
{ cmd = "ruff check ." },
{ cmd = "validate-pyproject pyproject.toml" },
{ cmd = "mypy" },
]
Expand Down
7 changes: 6 additions & 1 deletion skeem/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
from skeem.settings import PEEK_BYTES, PEEK_LINES
from skeem.type import ContentType, ContentTypeGroup

if t.TYPE_CHECKING:
import xarray

Check warning on line 17 in skeem/io.py

View check run for this annotation

Codecov / codecov/patch

skeem/io.py#L17

Added line #L17 was not covered by tests


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -125,7 +129,8 @@


def dataset_to_dataframe(
ds: "xarray.Dataset", peek_lines: int # type: ignore[name-defined] # noqa: F821
ds: "xarray.Dataset",
peek_lines: int,
) -> pd.DataFrame:
logger.info(f"Dataset:\n{ds}")
df = ds.to_dataframe().dropna()
Expand Down
3 changes: 1 addition & 2 deletions skeem/pandas/io_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,7 @@ def is_fsspec_url(url: t.Union[FilePath, BaseBuffer]) -> bool:
something fsspec can handle
"""
return (
isinstance(url, str)
and bool(_RFC_3986_PATTERN.match(url))
isinstance(url, str) and bool(_RFC_3986_PATTERN.match(url))
# PATCH for Skeem: Let HTTP requests also be handled by `fsspec`.
# and not url.startswith(("http://", "https://"))
)