Skip to content

Commit

Permalink
Add support for parsing Time types natively. Fixes #459
Browse files Browse the repository at this point in the history
  • Loading branch information
quinnj committed Jun 20, 2019
1 parent 7da7077 commit abda2e8
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 28 deletions.
8 changes: 5 additions & 3 deletions src/CSV.jl
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,9 @@ function file(source,
pool = pool === true ? 1.0 : pool isa Float64 ? pool : 0.0
refs = Vector{Dict{String, UInt64}}(undef, ncols)
lastrefs = zeros(UInt64, ncols)
t = time()
t = Base.time()
rows, tapes = parsetape(Val(transpose), IG, ncols, gettypecodes(typemap), tapes, tapelen, buf, datapos, len, limit, cmt, positions, pool, refs, lastrefs, rowsguess, typecodes, debug, options)
debug && println("time for initial parsing to tape: $(time() - t)")
debug && println("time for initial parsing to tape: $(Base.time() - t)")
for i = 1:ncols
typecodes[i] &= ~USER
end
Expand Down Expand Up @@ -349,6 +349,8 @@ function parsetape(::Val{transpose}, ignoreemptylines, ncols, typemap, tapes, ta
pos, code = parsevalue!(Date, T, tape, tapeidx, buf, pos, len, options, row, col, typecodes)
elseif type === DATETIME
pos, code = parsevalue!(DateTime, T, tape, tapeidx, buf, pos, len, options, row, col, typecodes)
elseif type === TIME
pos, code = parsevalue!(Time, T, tape, tapeidx, buf, pos, len, options, row, col, typecodes)
elseif type === BOOL
pos, code = parsevalue!(Bool, T, tape, tapeidx, buf, pos, len, options, row, col, typecodes)
elseif type === POOL
Expand Down Expand Up @@ -476,7 +478,7 @@ function detect(tape, tapeidx, buf, pos, len, options, row, col, typemap, pool,
if Parsers.ok(code)
setposlen!(tape, tapeidx, code, vpos, vlen)
@inbounds tape[tapeidx + 1] = uint64(dt)
@inbounds typecodes[col] = DT == Date ? (T == MISSINGTYPE ? (DATE | MISSING) : DATE) : (T == MISSINGTYPE ? (DATETIME | MISSING) : DATETIME)
@inbounds typecodes[col] = DT == Date ? (T == MISSINGTYPE ? (DATE | MISSING) : DATE) : DT == DateTime ? (T == MISSINGTYPE ? (DATETIME | MISSING) : DATETIME) : (T == MISSINGTYPE ? (TIME | MISSING) : TIME)
@goto done
end
end
Expand Down
18 changes: 4 additions & 14 deletions src/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,26 +45,16 @@ end
return getvalue(Base.nonmissingtype(T), f, ind, offlen, col)
end

function getvalue(::Type{Int64}, f, indexoffset, offlen, col)
@inbounds x = int64(gettape(f, col)[indexoffset + 1])
function getvalue(::Type{T}, f, indexoffset, offlen, col) where {T}
@inbounds x = reinterp_func(T)(gettape(f, col)[indexoffset + 1])
return x
end

function getvalue(::Type{Float64}, f, indexoffset, offlen, col)
@inbounds x = gettape(f, col)[indexoffset + 1]
return ifelse(intvalue(offlen), Float64(int64(x)), float64(x))
end
function getvalue(::Type{Date}, f, indexoffset, offlen, col)
@inbounds x = date(gettape(f, col)[indexoffset + 1])
return x
end
function getvalue(::Type{DateTime}, f, indexoffset, offlen, col)
@inbounds x = datetime(gettape(f, col)[indexoffset + 1])
return x
end
function getvalue(::Type{Bool}, f, indexoffset, offlen, col)
@inbounds x = bool(gettape(f, col)[indexoffset + 1])
return x
end

getvalue(::Type{Missing}, f, indexoffset, offlen, col) = missing

function getvalue(::Type{PooledString}, f, indexoffset, offlen, col)
Expand Down
6 changes: 0 additions & 6 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,6 @@ function Base.copy(c::Column{T, S}) where {T <: Union{String, Union{String, Miss
return A
end

reinterp_func(::Type{Int64}) = int64
reinterp_func(::Type{Float64}) = float64
reinterp_func(::Type{Date}) = date
reinterp_func(::Type{DateTime}) = datetime
reinterp_func(::Type{Bool}) = bool

@inline Base.@propagate_inbounds function Base.getindex(c::Column{Missing}, row::Int)
@boundscheck checkbounds(c, row)
return missing
Expand Down
21 changes: 16 additions & 5 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,25 @@ const INT = 0b00000010 % TypeCode
const FLOAT = 0b00000011 % TypeCode
const DATE = 0b00000100 % TypeCode
const DATETIME = 0b00000101 % TypeCode
const BOOL = 0b00000110 % TypeCode
const STRING = 0b00000111 % TypeCode
const POOL = 0b00001000 % TypeCode
const TIME = 0b00000110 % TypeCode
const BOOL = 0b00000111 % TypeCode
const STRING = 0b00001000 % TypeCode
const POOL = 0b00010000 % TypeCode
pooled(x::TypeCode) = (x & POOL) == POOL

# a user-provided type; a mask that can be combined w/ basic types
const USER = 0b00100000 % TypeCode
user(x::TypeCode) = (x & USER) === USER

const TYPEBITS = 0b00001111 % TypeCode
const TYPEBITS = 0b00011111 % TypeCode
typebits(x::TypeCode) = x & TYPEBITS

typecode(::Type{Missing}) = MISSINGTYPE
typecode(::Type{<:Integer}) = INT
typecode(::Type{<:AbstractFloat}) = FLOAT
typecode(::Type{Date}) = DATE
typecode(::Type{DateTime}) = DATETIME
typecode(::Type{Time}) = TIME
typecode(::Type{Bool}) = BOOL
typecode(::Type{<:AbstractString}) = STRING
typecode(::Type{PooledString}) = POOL
Expand All @@ -76,6 +78,7 @@ const TYPECODES = Dict(
FLOAT => Float64,
DATE => Date,
DATETIME => DateTime,
TIME => Time,
BOOL => Bool,
POOL => PooledString,
STRING => String,
Expand Down Expand Up @@ -110,14 +113,22 @@ float64(x::UInt64) = Core.bitcast(Float64, x)
bool(x::UInt64) = x == 0x0000000000000001
date(x::UInt64) = Date(Dates.UTD(int64(x)))
datetime(x::UInt64) = DateTime(Dates.UTM(int64(x)))
time(x::UInt64) = Time(Nanosecond(int64(x)))
ref(x::UInt64) = unsafe_trunc(UInt32, x)

uint64(x::Int64) = Core.bitcast(UInt64, x)
uint64(x::Float64) = Core.bitcast(UInt64, x)
uint64(x::Bool) = UInt64(x)
uint64(x::Union{Date, DateTime}) = uint64(Dates.value(x))
uint64(x::Union{Date, DateTime, Time}) = uint64(Dates.value(x))
uint64(x::UInt32) = UInt64(x)

reinterp_func(::Type{Int64}) = int64
reinterp_func(::Type{Float64}) = float64
reinterp_func(::Type{Date}) = date
reinterp_func(::Type{DateTime}) = datetime
reinterp_func(::Type{Time}) = time
reinterp_func(::Type{Bool}) = bool

@noinline function consumeBOM!(source)
# BOM character detection
startpos = pos = 1
Expand Down
5 changes: 5 additions & 0 deletions test/basics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ rows = collect(f)
@test rows[1].a == 1
@test rows[1].a == 1

# 459
rows = collect(CSV.File(joinpath(dir, "time.csv"); dateformat="H:M:S"))
@test rows[1].time == Time(0)
@test rows[2].time == Time(0, 10)

# 388
df = CSV.read(joinpath(dir, "GSM2230757_human1_umifm_counts.csv"))
@test size(df) == (3, 20128)
Expand Down
6 changes: 6 additions & 0 deletions test/testfiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,12 @@ testfiles = [
NamedTuple{(:SampleID, :Mother_Child, :SubjectID, :MaternalID, :TimePoint, :Fecal_EtOH, :CollectionRep, :DOC, :RAInitials_DOC, :DOF, :RAInitials_DOF, :Date_Brought_In, :RAInitials_Brought, :Date_Shipped, :RAInitials_Shipped, :Date_Aliquoted, :Number_Replicates, :RAInitials_Aliquot, :StorageBox, :DOE, :Extract_number, :AliquotRep, :DNABox, :KitUsed, :RAInitials_Extract, :DNAConc, :DOM, :Mgx_processed, :Mgx_batch, :DO16S, :_16S_processed, :_16S_batch, :_16S_plate, :Notes, :Discrepancies, :Batch_1_Mapping, :Mgx_batch_Mapping, :_16S_batch_Mapping, :Mother_Child_Dyads), Tuple{String, String, Int64, String, Int64, String, Int64, Dates.Date, Union{Missing, String}, Union{Missing, Date}, Union{Missing, String}, Union{Missing, Date}, Union{Missing, String}, Union{Missing, Date}, Union{Missing, String}, Union{Missing, Date}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, Date}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, Float64}, Union{Missing, Date}, Union{Missing, String}, Union{Missing, String}, Union{Missing, Date}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, String}, Union{Missing, Int64}}},
nothing
),
# 459, support parsing Time
("time.csv", (dateformat="H:M:S",),
(2, 2),
NamedTuple{(:time, :value), Tuple{Time, Int64}},
(time = [Time(0), Time(0, 10)], value = [1, 2])
)
];

for test in testfiles
Expand Down
3 changes: 3 additions & 0 deletions test/testfiles/time.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
time,value
00:00:00,1
00:10:00,2

0 comments on commit abda2e8

Please sign in to comment.