From f40ba1f29aa8878e9e514794f6651daeeebc3e3a Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Thu, 29 Oct 2020 11:09:58 -0600 Subject: [PATCH] Fix CSV.Chunks crashing due to finalized buffer Fixes #734. The issue here is that on windows, we aggressively finalize the file source buffer as a way to "close" the file, since windows is much more picky when trying to modify an mmapped file after parsing. i.e. windows complains if you try to modify a file and there's still an mmap reference to it somewhere, so at the end of CSV.File, we call `finalize(buf)` to ensure it gets unmapped. The problem, however, is that `CSV.Chunks` calls `CSV.File` multiple times on the same `buf`, providing different starting/ending byte positions to read in chunks. So naturally, after the first chunk was read, the buffer was getting finalized and then the 2nd chunk immediately crashed trying to read its chunk from a finalized buffer. --- src/chunks.jl | 2 +- src/file.jl | 3 ++- src/header.jl | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/chunks.jl b/src/chunks.jl index 1b57ee6e..e5b28360 100644 --- a/src/chunks.jl +++ b/src/chunks.jl @@ -81,7 +81,7 @@ end function Base.iterate(x::Chunks, i=1) i >= length(x.ranges) && return nothing - f = File(x.h; startingbyteposition=x.ranges[i], endingbyteposition=(x.ranges[i + 1] - (i != length(x.ranges))), threaded=x.threaded, typemap=x.typemap, tasks=x.tasks, debug=x.debug) + f = File(x.h; finalizebuffer=false, startingbyteposition=x.ranges[i], endingbyteposition=(x.ranges[i + 1] - (i != length(x.ranges))), threaded=x.threaded, typemap=x.typemap, tasks=x.tasks, debug=x.debug) return f, i + 1 end diff --git a/src/file.jl b/src/file.jl index 4b0bf14b..70f5aa5a 100644 --- a/src/file.jl +++ b/src/file.jl @@ -220,6 +220,7 @@ function File(source; end function File(h::Header; + finalizebuffer=true, startingbyteposition=nothing, endingbyteposition=nothing, limit::Union{Integer, Nothing}=nothing, @@ -344,7 +345,7 @@ function File(h::Header; lookup = Dict(k => v for (k, v) in zip(h.names, columns)) # for windows, it's particularly finicky about throwing errors when you try to modify an mmapped file # so we just want to make sure we finalize the input buffer so users don't run into surprises - if Sys.iswindows() && ncols > 0 && !lazystrings(flags[1]) + if finalizebuffer && Sys.iswindows() && ncols > 0 && !lazystrings(flags[1]) finalize(buf) end return File{something(threaded, false)}(h.name, h.names, types, finalrows, ncols, columns, lookup) diff --git a/src/header.jl b/src/header.jl index 3b0818f8..9a10ef3a 100644 --- a/src/header.jl +++ b/src/header.jl @@ -38,7 +38,7 @@ Checks whether a character or string is valid for use as a delimiter. If Throws an error if `delim` is invalid. """ function checkvaliddelim(delim) - delim != nothing && !isvaliddelim(delim) && + delim !== nothing && !isvaliddelim(delim) && throw(ArgumentError("invalid delim argument = '$(escape_string(string(delim)))', "* "the following delimiters are invalid: '\\r', '\\n', '\\0'")) end