diff --git a/src/context.jl b/src/context.jl index 047b4313..728a6543 100644 --- a/src/context.jl +++ b/src/context.jl @@ -649,9 +649,10 @@ end debug && println("single-threaded estimated rows = $origrowsguess, multi-threaded estimated rows = $rowsguess") debug && println("multi-threaded column types sampled as: $columns") else - @error "Multi-threaded parsing failed (are there newlines inside quoted fields?), falling back to single-threaded parsing" + # The following debug statement is doubled by a loud @warning or @error in parsefilechunk! + debug && println("multi-threaded parsing failed! Falling back to single thread, reinitializing column types.") reinitialize_column_type!(columns, types, names, stringtype, streaming) - threaded = false + threaded = false # the failing is signaled by having !ctx.threaded && ctx.ntasks > 1 end end if !threaded diff --git a/src/file.jl b/src/file.jl index da5af800..66632471 100644 --- a/src/file.jl +++ b/src/file.jl @@ -564,6 +564,15 @@ function parsefilechunk!(ctx::Context, pos, len, rowsguess, rowoffset, columns, rowsguess = newrowsguess end end + if !ctx.threaded && ctx.ntasks > 1 && !ctx.silencewarnings + # !ctx.threaded && ctx.ntasks > 1 indicate that multithreaded parsing failed. + # Thes messages echo the corresponding debug statement in the definition of ctx + if numwarnings[] > 0 + @warn "Multithreaded parsing failed and fell back to single-threaded parsing, check previous warnings for possible reasons." + else + @error "Multithreaded parsing failed and fell back to single-threaded parsing. This can happen if the input contains multi-line fields; otherwise, please report this issue." + end + end end # done parsing (at least this chunk), so resize columns to final row count for col in columns diff --git a/test/testfiles.jl b/test/testfiles.jl index 01156cc2..e5b5f858 100644 --- a/test/testfiles.jl +++ b/test/testfiles.jl @@ -709,3 +709,16 @@ testfile("test_basic.csv", (types=Dict(2=>Float64),), (col1 = [1, 4, 7], col2 = [2.0, 5.0, 8.0], col3 = [3, 6, 9]); dir=Path(dir) ) + +# https://github.com/JuliaData/CSV.jl/pull/1099 +@info "The following test is expected to @error with \"Multithreaded parsing failed...\"" +testfile("test_multiline_field_errorwarning.csv", (ntasks=3,), + (20, 3), + NamedTuple{(:col1, :col2, :col3), Tuple{String3, String, Int}}, + let col1 = [String3("A$i") for i in 1:19], col2 = [".$i" for i in 1:19], col3 = collect(1:19) + insert!(col1, 14, String3("foo")) + insert!(col2, 14, "a field to thwart all heuristics\n ,,,\n, ,\n , ,,\n, ,,\n , ,,\n,,\n ,,\n , ,\n , ,\n ,, , ,\n , ,,,\n, ,,\n\n, , , ,\n , ,\n\n ,,,\n,,,\n,,,\n ,,,\n\n,\n,\n") + insert!(col3, 14, 0) + (; col1, col2, col3) + end +) \ No newline at end of file diff --git a/test/testfiles/test_multiline_field_errorwarning.csv b/test/testfiles/test_multiline_field_errorwarning.csv new file mode 100644 index 00000000..fb749e0f --- /dev/null +++ b/test/testfiles/test_multiline_field_errorwarning.csv @@ -0,0 +1,45 @@ +col1,col2,col3 +A1,.1,1 +A2,.2,2 +A3,.3,3 +A4,.4,4 +A5,.5,5 +A6,.6,6 +A7,.7,7 +A8,.8,8 +A9,.9,9 +A10,.10,10 +A11,.11,11 +A12,.12,12 +A13,.13,13 +foo,"a field to thwart all heuristics + ,,, +, , + , ,, +, ,, + , ,, +,, + ,, + , , + , , + ,, , , + , ,,, +, ,, + +, , , , + , , + + ,,, +,,, +,,, + ,,, + +, +, +",-0 +A14,.14,14 +A15,.15,15 +A16,.16,16 +A17,.17,17 +A18,.18,18 +A19,.19,19 \ No newline at end of file