Skip to content

Commit

Permalink
Merge pull request #58 from JuliaComputing/backports-0.2.8
Browse files Browse the repository at this point in the history
Backports for 0.2.8
  • Loading branch information
mortenpi authored Nov 29, 2022
2 parents 09efe3d + e8f153d commit 6c06e3a
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 34 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DataSets"
uuid = "c9661210-8a83-48f0-b833-72e62abce419"
authors = ["Chris Foster <[email protected]> and contributors"]
version = "0.2.7"
version = "0.2.8"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
43 changes: 24 additions & 19 deletions src/DataSets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,21 +94,20 @@ separated with forward slashes. Examples:
organization-dataset_name/project/data
"""
function check_dataset_name(name::AbstractString)
# DataSet names disallow most punctuation for now, as it may be needed as
# delimiters in data-related syntax (eg, for the data REPL).
dataset_name_pattern = r"
^
[[:alpha:]]
(?:
[-[:alnum:]_] |
/ (?=[[:alpha:]])
)*
$
"x
if !occursin(dataset_name_pattern, name)
if !occursin(DATASET_NAME_REGEX, name)
error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.")
end
end
# DataSet names disallow most punctuation for now, as it may be needed as
# delimiters in data-related syntax (eg, for the data REPL).
const DATASET_NAME_REGEX_STRING = raw"""
[[:alpha:]]
(?:
[-[:alnum:]_] |
/ (?=[[:alpha:]])
)*
"""
const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x")

# Hacky thing until we figure out which fields DataSet should actually have.
function Base.getproperty(d::DataSet, name::Symbol)
Expand Down Expand Up @@ -254,16 +253,22 @@ function _unescapeuri(str)
return String(take!(out))
end

# Parse as a suffix of URI syntax
# name/of/dataset?param1=value1&param2=value2#fragment
const DATASET_SPEC_REGEX = Regex(
"""
^
($(DATASET_NAME_REGEX_STRING))
(?:\\?([^#]*))? # query - a=b&c=d
(?:\\#(.*))? # fragment - ...
\$
""",
"x",
)
function _split_dataspec(spec::AbstractString)
# Parse as a suffix of URI syntax
# name/of/dataset?param1=value1&param2=value2#fragment
m = match(r"
^
((?:[[:alpha:]][[:alnum:]_]*/?)+) # name - a/b/c
(?:\?([^#]*))? # query - a=b&c=d
(?:\#(.*))? # fragment - ...
$"x,
spec)
m = match(DATASET_SPEC_REGEX, spec)
if isnothing(m)
return nothing, nothing, nothing
end
Expand Down
42 changes: 28 additions & 14 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,23 +92,37 @@ end

#-------------------------------------------------------------------------------
@testset "Data set name parsing" begin
# Valid names
@test DataSets.check_dataset_name("a_b") === nothing
@test DataSets.check_dataset_name("a1") === nothing
@test DataSets.check_dataset_name("δεδομένα") === nothing
@test DataSets.check_dataset_name("a/b") === nothing
@test DataSets.check_dataset_name("a/b/c") === nothing
@test DataSets.check_dataset_name("a-b-c-") === nothing
# Invalid names
@test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") DataSets.check_dataset_name("a?b")
@test_throws ErrorException DataSets.check_dataset_name("1")
@test_throws ErrorException DataSets.check_dataset_name("a b")
@test_throws ErrorException DataSets.check_dataset_name("a.b")
@test_throws ErrorException DataSets.check_dataset_name("a/b/")
@test_throws ErrorException DataSets.check_dataset_name("/a/b")
@testset "Valid name: $name" for name in (
"a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_",
)
@test DataSets.check_dataset_name(name) === nothing
@test DataSets._split_dataspec(name) == (name, nothing, nothing)
end

@testset "Invalid name: $name" for name in (
"1", "a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/1", "a/ _/b"
)
@test_throws ErrorException DataSets.check_dataset_name(name)
@test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
end
end

@testset "URL-like dataspec parsing" begin
# Valid dataspecs
DataSets._split_dataspec("foo?x=1#f") == ("foo", ["x" => "1"], "f")
DataSets._split_dataspec("foo#f") == ("foo", nothing, "f")
DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
# Invalid dataspecs
DataSets._split_dataspec("foo ?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("foo\n?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("foo\nbar?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec(" foo?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("1?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("foo-?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("foo #f") == (nothing, nothing, nothing)
DataSets._split_dataspec("@?x=1") == (nothing, nothing, nothing)

proj = DataSets.load_project("Data.toml")

@test !haskey(dataset(proj, "a_text_file"), "dataspec")
Expand Down

2 comments on commit 6c06e3a

@mortenpi
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register branch=release-0.2

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/73070

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.8 -m "<description of version>" 6c06e3a93fa7c052e587479ea52e5697c169f3b7
git push origin v0.2.8

Please sign in to comment.