Skip to content

Commit

Permalink
In CSV.jl#523, we have an interesting case of some rows having a rand…
Browse files Browse the repository at this point in the history
…om trailing delimiter. At first glance, it seems a rather corrupt file, but upon further investigation, it seems that with ignorerepeated=true, the final delimiter should be detected, and if a newline follows directly after, it should not signal an additional missing field, as it does with ignorerepeated=false. The bug here is when we successfully matched a delimiter, we weren't then ignoring a newline that directly followed (#39)
  • Loading branch information
quinnj authored Nov 5, 2019
1 parent 51fe70d commit e6dd7a7
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 0 deletions.
29 changes: 29 additions & 0 deletions src/Parsers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,20 @@ end
end
end
if matched
# if a newline is next, consume it as well
if b == UInt8('\n')
pos += 1
incr!(source)
code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
elseif b == UInt8('\r')
pos += 1
incr!(source)
if !eof(source, pos, len) && peekbyte(source, pos) == UInt8('\n')
pos += 1
incr!(source)
end
code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
end
code |= DELIMITED
@goto donedone
end
Expand All @@ -412,6 +426,21 @@ end
pos = checkdelim(source, pos, len, delim)
end
if matched
# if a newline is next, consume it as well
b = peekbyte(source, pos)
if b == UInt8('\n')
pos += 1
incr!(source)
code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
elseif b == UInt8('\r')
pos += 1
incr!(source)
if !eof(source, pos, len) && peekbyte(source, pos) == UInt8('\n')
pos += 1
incr!(source)
end
code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
end
code |= DELIMITED
@goto donedone
end
Expand Down
29 changes: 29 additions & 0 deletions src/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,20 @@
end
end
if matched
# if a newline is next, consume it as well
if b == UInt8('\n')
pos += 1
incr!(source)
code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
elseif b == UInt8('\r')
pos += 1
incr!(source)
if !eof(source, pos, len) && peekbyte(source, pos) == UInt8('\n')
pos += 1
incr!(source)
end
code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
end
code |= DELIMITED
@goto donedone
end
Expand All @@ -206,6 +220,21 @@
pos = checkdelim(source, pos, len, delim)
end
if matched
# if a newline is next, consume it as well
b = peekbyte(source, pos)
if b == UInt8('\n')
pos += 1
incr!(source)
code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
elseif b == UInt8('\r')
pos += 1
incr!(source)
if !eof(source, pos, len) && peekbyte(source, pos) == UInt8('\n')
pos += 1
incr!(source)
end
code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
end
code |= DELIMITED
@goto donedone
end
Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ testcases = [
# ignorerepeated
(str="1a,,", kwargs=(ignorerepeated=true,), x=1, code=(OK | DELIMITED | INVALID_DELIMITER), vpos=1, vlen=2, tlen=4),
(str="1a,,2", kwargs=(ignorerepeated=true,), x=1, code=(OK | DELIMITED | INVALID_DELIMITER), vpos=1, vlen=2, tlen=4),
(str="1,\n", kwargs=(ignorerepeated=true, delim=UInt8(',')), x=1, code=(OK | DELIMITED | NEWLINE | EOF), vpos=1, vlen=1, tlen=3),
];

for useio in (false, true)
Expand Down

0 comments on commit e6dd7a7

Please sign in to comment.