diff --git a/Project.toml b/Project.toml index e5e2c057..5a909322 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ version = "0.5.13" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" @@ -19,11 +20,12 @@ WeakRefStrings = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" [compat] CategoricalArrays = "0.5,0.6,0.7" DataFrames = "0.18,0.19,0.20" +FilePathsBase = "0.6" +LazyArrays = "0.12" Parsers = "0.3" PooledArrays = "0.5" Tables = "0.1,0.2" WeakRefStrings = "0.5,0.6" -LazyArrays = "0.12" julia = "1" [extras] diff --git a/src/CSV.jl b/src/CSV.jl index 5eebbbfb..8d062dfa 100644 --- a/src/CSV.jl +++ b/src/CSV.jl @@ -3,7 +3,7 @@ module CSV # stdlib using Mmap, Dates, Unicode using Parsers, Tables -using PooledArrays, CategoricalArrays, WeakRefStrings, DataFrames, LazyArrays +using PooledArrays, CategoricalArrays, WeakRefStrings, DataFrames, FilePathsBase, LazyArrays function validate(fullpath::Union{AbstractString,IO}; kwargs...) Base.depwarn("`CSV.validate` is deprecated. `CSV.read` now prints warnings on misshapen files.", :validate) @@ -119,7 +119,7 @@ Read a UTF-8 CSV input (a filename given as a String or FilePaths.jl type, or an Opens the file and uses passed arguments to detect the number of columns and column types, unless column types are provided manually via the `types` keyword argument. Note that passing column types manually can increase performance and reduce the -memory use for each column type provided (column types can be given as a `Vector` for all columns, or specified per column via +memory use for each column type provided (column types can be given as a `Vector` for all columns, or specified per column via name or index in a `Dict`). For text encodings other than UTF-8, see the [StringEncodings.jl](https://github.com/JuliaStrings/StringEncodings.jl) package for re-encoding a file or IO stream. The returned `CSV.File` object supports the [Tables.jl](https://github.com/JuliaData/Tables.jl) interface diff --git a/src/utils.jl b/src/utils.jl index 0694f133..6e61a9bd 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -175,27 +175,25 @@ function slurp(source) return final end +getsource(source::Vector{UInt8}, ::Any) = source +getsource(source::Cmd, ::Any) = Base.read(source) +getsource(source::AbstractPath, ::Any) = Base.read(open(source)) +getsource(source::IO, ::Any) = slurp(source) +getsource(source::SystemPath, use_mmap) = getsource(string(source), use_mmap) function getsource(source, use_mmap) - if source isa Vector{UInt8} - return source - elseif source isa Cmd - return Base.read(source) - elseif use_mmap && !isa(source, IO) - return Mmap.mmap(source) - elseif !isa(source, IO) - m = Mmap.mmap(source) - m2 = Mmap.mmap(Vector{UInt8}, length(m)) - copyto!(m2, 1, m, 1, length(m)) - finalize(m) - return m2 - else - return slurp(source isa IO ? source : open(String(source))) + m = Mmap.mmap(source) + if use_mmap + return m end + m2 = Mmap.mmap(Vector{UInt8}, length(m)) + copyto!(m2, 1, m, 1, length(m)) + finalize(m) + return m2 end getname(buf::Vector{UInt8}) = "" getname(cmd::Cmd) = string(cmd) -getname(str) = String(str) +getname(str) = string(str) getname(io::I) where {I <: IO} = string("<", I, ">") const RESERVED = Set(["local", "global", "export", "let", diff --git a/src/write.jl b/src/write.jl index 9b2577c7..ce6a6c18 100644 --- a/src/write.jl +++ b/src/write.jl @@ -9,7 +9,7 @@ Supported keyword arguments include: * `quotechar::Char='"'`: ascii character to use for quoting text fields that may contain delimiters or newlines * `openquotechar::Char`: instead of `quotechar`, use `openquotechar` and `closequotechar` to support different starting and ending quote characters * `escapechar::Char='"'`: ascii character used to escape quote characters in a text field -* `missingstring::String=""`: string to print for `missing` values +* `missingstring::String=""`: string to print for `missing` values * `dateformat=Dates.default_format(T)`: the date format string to use for printing out `Date` & `DateTime` columns * `append=false`: whether to append writing to an existing file/IO, if `true`, it will not write column names by default * `writeheader=!append`: whether to write an initial row of delimited column names, not written by default if appending @@ -136,7 +136,7 @@ function with(f::Function, io::Union{Base.TTY, Base.Pipe, Base.PipeEndpoint, Bas f(io) end -function with(f::Function, file::String, append) +function with(f::Function, file::Union{AbstractString, AbstractPath}, append) open(file, append ? "a" : "w") do io f(io) end diff --git a/test/runtests.jl b/test/runtests.jl index 6701229c..c84a83b4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using Test, CSV, Dates, Tables, DataFrames, CategoricalArrays, PooledArrays, CodecZlib +using Test, CSV, Dates, Tables, DataFrames, CategoricalArrays, PooledArrays, CodecZlib, FilePathsBase const dir = joinpath(dirname(pathof(CSV)), "..", "test", "testfiles") @@ -75,7 +75,7 @@ end v = f.X[1] @test v == "b" @test levels(v.pool) == ["a", "b", "c"] - + f = CSV.read(IOBuffer("X\nb\nc\na\nc"), categorical=true, copycols=true) v = f.X[1] @test v == "b" diff --git a/test/testfiles.jl b/test/testfiles.jl index dc7075ae..f31ccff5 100644 --- a/test/testfiles.jl +++ b/test/testfiles.jl @@ -1,4 +1,4 @@ -function testfile(file, kwargs, expected_sz, expected_sch, testfunc) +function testfile(file, kwargs, expected_sz, expected_sch, testfunc; dir=dir) println("testing $file") if file isa IO seekstart(file) @@ -24,7 +24,7 @@ function testfile(file, kwargs, expected_sz, expected_sch, testfunc) end testfiles = [ - # file, kwargs, expected_sz, expected_sch, testfunc = + # file, kwargs, expected_sz, expected_sch, testfunc = ("test_utf8_with_BOM.csv", NamedTuple(), (3, 3), NamedTuple{(:col1, :col2, :col3),Tuple{Float64,Float64,Float64}}, @@ -605,3 +605,10 @@ testfiles = [ for test in testfiles testfile(test...) end +# Test file with FilePaths +testfile("test_basic.csv", (types=Dict(2=>Float64),), + (3, 3), + NamedTuple{(:col1, :col2, :col3),Tuple{Int64,Float64,Int64}}, + (col1 = [1, 4, 7], col2 = [2.0, 5.0, 8.0], col3 = [3, 6, 9]); + dir=Path(dir) +) diff --git a/test/write.jl b/test/write.jl index c7c84847..359d3409 100644 --- a/test/write.jl +++ b/test/write.jl @@ -70,6 +70,11 @@ using CSV, Dates, WeakRefStrings, CategoricalArrays, Tables @test String(read(file)) == "col1,col2,col3\n1,4,7\n2,5,8\n3,6,9\n" rm(file) + filepath = Path(file) + (col1=[1,2,3], col2=[4,5,6], col3=[7,8,9]) |> CSV.write(filepath) + @test String(read(filepath)) == "col1,col2,col3\n1,4,7\n2,5,8\n3,6,9\n" + rm(filepath) + open(file, "w") do io (col1=[1,2,3], col2=[4,5,6], col3=[7,8,9]) |> CSV.write(io) end @@ -165,7 +170,7 @@ using CSV, Dates, WeakRefStrings, CategoricalArrays, Tables # validate char args: #369 @test_throws ArgumentError (col1=[1,2,3], col2=[4,5,6], col3=[7,8,9]) |> CSV.write(io; escapechar='☃') - + # custom float decimal: #385 (col1=[1.1,2.2,3.3], col2=[4,5,6], col3=[7,8,9]) |> CSV.write(io; delim='\t', decimal=',') @test String(take!(io)) == "col1\tcol2\tcol3\n1,1\t4\t7\n2,2\t5\t8\n3,3\t6\t9\n" @@ -203,4 +208,9 @@ using CSV, Dates, WeakRefStrings, CategoricalArrays, Tables (col1=[""],) |> CSV.write(io) @test String(take!(io)) == "col1\n\n" + # test with FilePath + mktmpdir() do tmp + CSV.write(tmp / "test.txt", df) + @test CSV.read(tmp / "test.txt") == df + end end # @testset "CSV.write"