Skip to content

Commit

Permalink
Ensure FilePaths work (#511)
Browse files Browse the repository at this point in the history
* Ensure FilePaths work

* Use FilePathsBase instead of FilePaths

* Enables mmap on SystemPaths

* Adds test for FilePaths with DataFrame

* Adds compat for FilePathsBase

* Switches to dispatch-based approach
  • Loading branch information
Sam Morrison authored and quinnj committed Oct 19, 2019
1 parent 164888e commit 745bb25
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 25 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "0.5.13"
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
Expand All @@ -19,11 +20,12 @@ WeakRefStrings = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
[compat]
CategoricalArrays = "0.5,0.6,0.7"
DataFrames = "0.18,0.19,0.20"
FilePathsBase = "0.6"
LazyArrays = "0.12"
Parsers = "0.3"
PooledArrays = "0.5"
Tables = "0.1,0.2"
WeakRefStrings = "0.5,0.6"
LazyArrays = "0.12"
julia = "1"

[extras]
Expand Down
4 changes: 2 additions & 2 deletions src/CSV.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module CSV
# stdlib
using Mmap, Dates, Unicode
using Parsers, Tables
using PooledArrays, CategoricalArrays, WeakRefStrings, DataFrames, LazyArrays
using PooledArrays, CategoricalArrays, WeakRefStrings, DataFrames, FilePathsBase, LazyArrays

function validate(fullpath::Union{AbstractString,IO}; kwargs...)
Base.depwarn("`CSV.validate` is deprecated. `CSV.read` now prints warnings on misshapen files.", :validate)
Expand Down Expand Up @@ -119,7 +119,7 @@ Read a UTF-8 CSV input (a filename given as a String or FilePaths.jl type, or an
Opens the file and uses passed arguments to detect the number of columns and column types, unless column types are provided
manually via the `types` keyword argument. Note that passing column types manually can increase performance and reduce the
memory use for each column type provided (column types can be given as a `Vector` for all columns, or specified per column via
memory use for each column type provided (column types can be given as a `Vector` for all columns, or specified per column via
name or index in a `Dict`). For text encodings other than UTF-8, see the [StringEncodings.jl](https://github.com/JuliaStrings/StringEncodings.jl)
package for re-encoding a file or IO stream.
The returned `CSV.File` object supports the [Tables.jl](https://github.com/JuliaData/Tables.jl) interface
Expand Down
28 changes: 13 additions & 15 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -175,27 +175,25 @@ function slurp(source)
return final
end

getsource(source::Vector{UInt8}, ::Any) = source
getsource(source::Cmd, ::Any) = Base.read(source)
getsource(source::AbstractPath, ::Any) = Base.read(open(source))
getsource(source::IO, ::Any) = slurp(source)
getsource(source::SystemPath, use_mmap) = getsource(string(source), use_mmap)
function getsource(source, use_mmap)
if source isa Vector{UInt8}
return source
elseif source isa Cmd
return Base.read(source)
elseif use_mmap && !isa(source, IO)
return Mmap.mmap(source)
elseif !isa(source, IO)
m = Mmap.mmap(source)
m2 = Mmap.mmap(Vector{UInt8}, length(m))
copyto!(m2, 1, m, 1, length(m))
finalize(m)
return m2
else
return slurp(source isa IO ? source : open(String(source)))
m = Mmap.mmap(source)
if use_mmap
return m
end
m2 = Mmap.mmap(Vector{UInt8}, length(m))
copyto!(m2, 1, m, 1, length(m))
finalize(m)
return m2
end

getname(buf::Vector{UInt8}) = "<raw buffer>"
getname(cmd::Cmd) = string(cmd)
getname(str) = String(str)
getname(str) = string(str)
getname(io::I) where {I <: IO} = string("<", I, ">")

const RESERVED = Set(["local", "global", "export", "let",
Expand Down
4 changes: 2 additions & 2 deletions src/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Supported keyword arguments include:
* `quotechar::Char='"'`: ascii character to use for quoting text fields that may contain delimiters or newlines
* `openquotechar::Char`: instead of `quotechar`, use `openquotechar` and `closequotechar` to support different starting and ending quote characters
* `escapechar::Char='"'`: ascii character used to escape quote characters in a text field
* `missingstring::String=""`: string to print for `missing` values
* `missingstring::String=""`: string to print for `missing` values
* `dateformat=Dates.default_format(T)`: the date format string to use for printing out `Date` & `DateTime` columns
* `append=false`: whether to append writing to an existing file/IO, if `true`, it will not write column names by default
* `writeheader=!append`: whether to write an initial row of delimited column names, not written by default if appending
Expand Down Expand Up @@ -136,7 +136,7 @@ function with(f::Function, io::Union{Base.TTY, Base.Pipe, Base.PipeEndpoint, Bas
f(io)
end

function with(f::Function, file::String, append)
function with(f::Function, file::Union{AbstractString, AbstractPath}, append)
open(file, append ? "a" : "w") do io
f(io)
end
Expand Down
4 changes: 2 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using Test, CSV, Dates, Tables, DataFrames, CategoricalArrays, PooledArrays, CodecZlib
using Test, CSV, Dates, Tables, DataFrames, CategoricalArrays, PooledArrays, CodecZlib, FilePathsBase

const dir = joinpath(dirname(pathof(CSV)), "..", "test", "testfiles")

Expand Down Expand Up @@ -75,7 +75,7 @@ end
v = f.X[1]
@test v == "b"
@test levels(v.pool) == ["a", "b", "c"]

f = CSV.read(IOBuffer("X\nb\nc\na\nc"), categorical=true, copycols=true)
v = f.X[1]
@test v == "b"
Expand Down
11 changes: 9 additions & 2 deletions test/testfiles.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function testfile(file, kwargs, expected_sz, expected_sch, testfunc)
function testfile(file, kwargs, expected_sz, expected_sch, testfunc; dir=dir)
println("testing $file")
if file isa IO
seekstart(file)
Expand All @@ -24,7 +24,7 @@ function testfile(file, kwargs, expected_sz, expected_sch, testfunc)
end

testfiles = [
# file, kwargs, expected_sz, expected_sch, testfunc =
# file, kwargs, expected_sz, expected_sch, testfunc =
("test_utf8_with_BOM.csv", NamedTuple(),
(3, 3),
NamedTuple{(:col1, :col2, :col3),Tuple{Float64,Float64,Float64}},
Expand Down Expand Up @@ -605,3 +605,10 @@ testfiles = [
for test in testfiles
testfile(test...)
end
# Test file with FilePaths
testfile("test_basic.csv", (types=Dict(2=>Float64),),
(3, 3),
NamedTuple{(:col1, :col2, :col3),Tuple{Int64,Float64,Int64}},
(col1 = [1, 4, 7], col2 = [2.0, 5.0, 8.0], col3 = [3, 6, 9]);
dir=Path(dir)
)
12 changes: 11 additions & 1 deletion test/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ using CSV, Dates, WeakRefStrings, CategoricalArrays, Tables
@test String(read(file)) == "col1,col2,col3\n1,4,7\n2,5,8\n3,6,9\n"
rm(file)

filepath = Path(file)
(col1=[1,2,3], col2=[4,5,6], col3=[7,8,9]) |> CSV.write(filepath)
@test String(read(filepath)) == "col1,col2,col3\n1,4,7\n2,5,8\n3,6,9\n"
rm(filepath)

open(file, "w") do io
(col1=[1,2,3], col2=[4,5,6], col3=[7,8,9]) |> CSV.write(io)
end
Expand Down Expand Up @@ -165,7 +170,7 @@ using CSV, Dates, WeakRefStrings, CategoricalArrays, Tables

# validate char args: #369
@test_throws ArgumentError (col1=[1,2,3], col2=[4,5,6], col3=[7,8,9]) |> CSV.write(io; escapechar='')

# custom float decimal: #385
(col1=[1.1,2.2,3.3], col2=[4,5,6], col3=[7,8,9]) |> CSV.write(io; delim='\t', decimal=',')
@test String(take!(io)) == "col1\tcol2\tcol3\n1,1\t4\t7\n2,2\t5\t8\n3,3\t6\t9\n"
Expand Down Expand Up @@ -203,4 +208,9 @@ using CSV, Dates, WeakRefStrings, CategoricalArrays, Tables
(col1=[""],) |> CSV.write(io)
@test String(take!(io)) == "col1\n\n"

# test with FilePath
mktmpdir() do tmp
CSV.write(tmp / "test.txt", df)
@test CSV.read(tmp / "test.txt") == df
end
end # @testset "CSV.write"

0 comments on commit 745bb25

Please sign in to comment.