Skip to content

Commit

Permalink
correctly parse all integer types. fixes #9289
Browse files Browse the repository at this point in the history
Used parseint instead of float64_isvalid.
  • Loading branch information
tanmaykm committed Dec 14, 2014
1 parent 795d7e0 commit 5ed0a6b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 13 deletions.
50 changes: 37 additions & 13 deletions base/datafmt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,14 @@ type DLMStore{T,S<:AbstractString} <: DLMHandler
sbuff::S
auto::Bool
eol::Char
tmp64::Array{Float64,1}
end

function DLMStore{T,S<:AbstractString}(::Type{T}, dims::NTuple{2,Integer}, has_header::Bool, sbuff::S, auto::Bool, eol::Char)
(nrows,ncols) = dims
((nrows == 0) || (ncols == 0)) && error("Empty input")
((nrows < 0) || (ncols < 0)) && error("Invalid dimensions")
hdr_offset = has_header ? 1 : 0
DLMStore{T,S}(fill(SubString(sbuff,1,0), 1, ncols), Array(T, nrows-hdr_offset, ncols), nrows, ncols, 0, 0, hdr_offset, sbuff, auto, eol, Array(Float64,1))
DLMStore{T,S}(fill(SubString(sbuff,1,0), 1, ncols), Array(T, nrows-hdr_offset, ncols), nrows, ncols, 0, 0, hdr_offset, sbuff, auto, eol)
end

function store_cell{T,S<:AbstractString}(dlmstore::DLMStore{T,S}, row::Int, col::Int, quoted::Bool, startpos::Int, endpos::Int)
Expand All @@ -156,7 +155,6 @@ function store_cell{T,S<:AbstractString}(dlmstore::DLMStore{T,S}, row::Int, col:
lastrow = dlmstore.lastrow
cells::Array{T,2} = dlmstore.data
sbuff::S = dlmstore.sbuff
tmp64 = dlmstore.tmp64

endpos = prevind(sbuff, nextind(sbuff,endpos))
(endpos > 0) && ('\n' == dlmstore.eol) && ('\r' == char(sbuff[endpos])) && (endpos = prevind(sbuff, endpos))
Expand All @@ -183,9 +181,9 @@ function store_cell{T,S<:AbstractString}(dlmstore::DLMStore{T,S}, row::Int, col:

# fill data
if quoted && ('"' in sval)
fail = colval(replace(sval, r"\"\"", "\""), cells, drow, col, tmp64)
fail = colval(replace(sval, r"\"\"", "\""), cells, drow, col)
else
fail = colval(sval, cells, drow, col, tmp64)
fail = colval(sval, cells, drow, col)
end
if fail
((T <: Number) && dlmstore.auto) ? throw(TypeError(:store_cell, "", Any, T)) : error("file entry \"$(sval)\" cannot be converted to $T")
Expand All @@ -195,7 +193,7 @@ function store_cell{T,S<:AbstractString}(dlmstore::DLMStore{T,S}, row::Int, col:
dlmstore.lastcol = col
else
# fill header
colval((quoted && ('"' in sval)) ? replace(sval, r"\"\"", "\"") : sval, dlmstore.hdr, 1, col, tmp64)
colval((quoted && ('"' in sval)) ? replace(sval, r"\"\"", "\"") : sval, dlmstore.hdr, 1, col)
end

nothing
Expand Down Expand Up @@ -292,6 +290,8 @@ function dlm_fill(T::DataType, offarr::Vector{Vector{Int}}, dims::NTuple{2,Integ
idx = 1
offidx = 1
offsets = offarr[1]
row = 0
col = 0
try
dh = DLMStore(T, dims, has_header, sbuff, auto, eol)
while idx <= length(offsets)
Expand All @@ -308,17 +308,41 @@ function dlm_fill(T::DataType, offarr::Vector{Vector{Int}}, dims::NTuple{2,Integ
return result(dh)
catch ex
isa(ex, TypeError) && (ex.func == :store_cell) && (return dlm_fill(ex.expected, offarr, dims, has_header, sbuff, auto, eol))
rethrow(ex)
error("at row $row, column $col : $ex")
end
end

colval{T<:Bool, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int) = ((sval=="true") && (cells[row,col]=true; return false); (sval=="false") && (cells[row,col]=false; return false); true)
colval{T<:Integer, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int) = (cells[row,col] = parseint(T, sval); false)

begin
local tmp64::Array{Float64,1} = Array(Float64,1)
local tmp32::Array{Float32,1} = Array(Float32,1)
global colval

colval{T<:Float64, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int) = (float64_isvalid(sval, tmp64) ? (cells[row,col] = tmp64[1]; false) : true)
colval{T<:Float32, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int) = (float32_isvalid(sval, tmp32) ? (cells[row,col] = tmp32[1]; false) : true)

function colval{S<:AbstractString}(sval::S, cells::Array{Any,2}, row::Int, col::Int)
if !isempty(sval)
# check Integer
try
cells[row,col] = parseint(sval)
return false
catch
(sval == "true") && (cells[row,col] = true; return false)
(sval == "false") && (cells[row,col] = false; return false)
end
# check FloatingPoint
float64_isvalid(sval, tmp64) && (cells[row,col] = tmp64[1]; return false)
end
cells[row,col] = sval
false
end
end

colval{T<:Bool, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int, tmp64::Array{Float64,1}) = ((sval=="true") && (cells[row,col]=true; return false); (sval=="false") && (cells[row,col]=false; return false); true)
colval{T<:Number, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int, tmp64::Array{Float64,1}) = (float64_isvalid(sval, tmp64) ? ((cells[row,col] = tmp64[1]); false) : true)
colval{T<:AbstractString, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int, tmp64::Array{Float64,1}) = ((cells[row,col] = sval); false)
colval{S<:AbstractString}(sval::S, cells::Array{Any,2}, row::Int, col::Int, tmp64::Array{Float64,1}) = ((cells[row,col] = float64_isvalid(sval, tmp64) ? tmp64[1] : sval); false)
colval{T<:Char, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int, tmp64::Array{Float64,1}) = ((length(sval) == 1) ? ((cells[row,col] = next(sval,1)[1]); false) : true)
colval{S<:AbstractString}(sval::S, cells::Array, row::Int, col::Int, tmp64::Array{Float64,1}) = true
colval{T<:AbstractString, S<:AbstractString}(sval::S, cells::Array{T,2}, row::Int, col::Int) = ((cells[row,col] = sval); false)
colval{S<:AbstractString}(sval::S, cells::Array, row::Int, col::Int) = true

dlm_parse(s::ASCIIString, eol::Char, dlm::Char, qchar::Char, cchar::Char, ign_adj_dlm::Bool, allow_quote::Bool, allow_comments::Bool, skipstart::Int, skipblanks::Bool, dh::DLMHandler) = begin
dlm_parse(s.data, uint8(uint32(eol)), uint8(uint32(dlm)), uint8(uint32(qchar)), uint8(uint32(cchar)),
Expand Down
4 changes: 4 additions & 0 deletions test/readdlm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,7 @@ let i18n_data = ["Origin (English)", "Name (English)", "Origin (Native)", "Name
@test (data, hdr) == readdlm(i18n_buff, '\t', header=true)
end

@test isequaldlm(readcsv(IOBuffer("1,22222222222222222222222222222222222222,0x3,10e6\n2000.1,true,false,-10.34"), Any),
reshape(Any[1,2000.1,float64(22222222222222222222222222222222222222),true,0x3,false,10e6,-10.34], 2, 4), Any)

@test isequaldlm(readcsv(IOBuffer("-9223355253176920979,9223355253176920979"), Int64), Int64[-9223355253176920979 9223355253176920979], Int64)

0 comments on commit 5ed0a6b

Please sign in to comment.