Skip to content

Commit

Permalink
option 3
Browse files Browse the repository at this point in the history
  • Loading branch information
mattdowle committed Aug 10, 2021
1 parent b819857 commit f80ad26
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 32 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@

30. `fread(file=URL)` now works rather than error `does not exist or is non-readable`, [#4952](https://github.com/Rdatatable/data.table/issues/4952). `fread(URL)` and `fread(input=URL)` worked before and continue to work. Thanks to @pnacht for reporting and @ben-schwen for the PR.

31. `fwrite(DF, row.names=TRUE)` where `DF` has specific integer rownames (e.g. using `rownames(DF) <- c(10L,20L,30L)`) would incorrectly ignore the integer rowname and write the row number instead, [#4957](https://github.com/Rdatatable/data.table/issues/4957). Thanks to @dgarrimar for reporting and @ColeMiller1 for the PR. Further, when quote='auto' and the rownames are integers, they are no longer quoted.
31. `fwrite(DF, row.names=TRUE)` where `DF` has specific integer rownames (e.g. using `rownames(DF) <- c(10L,20L,30L)`) would incorrectly ignore the integer rownames and write the row numbers instead, [#4957](https://github.com/Rdatatable/data.table/issues/4957). Thanks to @dgarrimar for reporting and @ColeMiller1 for the PR. Further, when quote='auto' and the rownames are integers (either default or specific), they are no longer quoted.

## NOTES

Expand Down
3 changes: 1 addition & 2 deletions R/fwrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
nThread = as.integer(nThread)
# write.csv default is 'double' so fwrite follows suit. write.table's default is 'escape'
# validate arguments
rn = if (row.names) row.names(x) else NULL # allocate row.names in R to address integer row.names #4957
if (is.matrix(x)) { # coerce to data.table if input object is matrix
messagef("x being coerced from class: matrix to data.table")
x = as.data.table(x)
Expand Down Expand Up @@ -112,7 +111,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
file = enc2native(file) # CfwriteR cannot handle UTF-8 if that is not the native encoding, see #3078.
.Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
showProgress, is_gzip, bom, yaml, verbose, encoding, rn)
showProgress, is_gzip, bom, yaml, verbose, encoding)
invisible()
}

38 changes: 21 additions & 17 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -10706,26 +10706,30 @@ test(1733.2, fwrite(data.table(c(1.2,-8.0,pi,67.99),1:4),dec=",",sep=";"),

# fwrite implied and actual row.names
DT = data.table(foo=1:3,bar=c(1.2,9.8,-6.0))
test(1734.1, capture.output(fwrite(DT,row.names=TRUE,quote=FALSE)),
capture.output(write.csv(DT,quote=FALSE)))
test(1734.2, capture.output(fwrite(DT,row.names=TRUE,quote=TRUE)),
capture.output(write.csv(DT)))
test(1734.3, fwrite(DT,row.names=TRUE,quote='auto'), # same other than 'foo' and 'bar' column names not quoted
output="\"\",foo,bar\n1,1,1.2\n2,2,9.8\n3,3,-6")
test(1734.01, capture.output(fwrite(DT,row.names=TRUE,quote=FALSE)),
capture.output(write.csv(DT,quote=FALSE)))
test(1734.02, capture.output(fwrite(DT,row.names=TRUE,quote=TRUE)),
capture.output(write.csv(DT)))
test(1734.03, fwrite(DT,row.names=TRUE,quote='auto'), # same other than 'foo' and 'bar' column names not quoted
output="\"\",foo,bar\n1,1,1.2\n2,2,9.8\n3,3,-6")
DF = as.data.frame(DT)
test(1734.4, capture.output(fwrite(DF,row.names=TRUE,quote=FALSE)),
capture.output(write.csv(DF,quote=FALSE)))
test(1734.5, capture.output(fwrite(DF,row.names=TRUE,quote=TRUE)),
capture.output(write.csv(DF)))
test(1734.04, capture.output(fwrite(DF,row.names=TRUE,quote=FALSE)),
capture.output(write.csv(DF,quote=FALSE)))
test(1734.05, capture.output(fwrite(DF,row.names=TRUE,quote=TRUE)),
capture.output(write.csv(DF)))
rownames(DF)[2] = "someName"
rownames(DF)[3] = "another"
test(1734.6, capture.output(fwrite(DF,row.names=TRUE,quote=FALSE)),
capture.output(write.csv(DF,quote=FALSE)))
test(1734.7, capture.output(fwrite(DF,row.names=TRUE,quote=TRUE)),
capture.output(write.csv(DF)))
rownames(DF) = c(10L, 20L, 30L) ## test for 4957
test(1734.8, capture.output(fwrite(DF, row.names = TRUE, quote = TRUE)),
capture.output(write.csv(DF)))
test(1734.06, capture.output(fwrite(DF,row.names=TRUE,quote=FALSE)),
capture.output(write.csv(DF,quote=FALSE)))
test(1734.07, capture.output(fwrite(DF,row.names=TRUE,quote=TRUE)),
capture.output(write.csv(DF)))
rownames(DF) = c(10L, -20L, 30L) ## test for #4957
test(1734.08, capture.output(fwrite(DF, row.names=TRUE, quote=TRUE)),
capture.output(write.csv(DF)))
test(1734.09, capture.output(fwrite(DF, row.names=TRUE, quote=FALSE)),
capture.output(write.csv(DF, quote=FALSE)))
test(1734.10, fwrite(DF, row.names=TRUE, quote='auto'),
output=c('"",foo,bar','10,1,1.2','-20,2,9.8','30,3,-6'))

# list columns and sep2
set.seed(1)
Expand Down
20 changes: 12 additions & 8 deletions src/fwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -623,8 +623,8 @@ void fwriteMain(fwriteMainArgs args)
DTPRINT(_("... "));
for (int j=args.ncol-10; j<args.ncol; j++) DTPRINT(_("%d "), args.whichFun[j]);
}
DTPRINT(_("\nargs.doRowNames=%d args.rowNames=%d doQuote=%d args.nrow=%"PRId64" args.ncol=%d eolLen=%d\n"),
args.doRowNames, args.rowNames, doQuote, args.nrow, args.ncol, eolLen);
DTPRINT(_("\nargs.doRowNames=%d args.rowNames=%p args.rowNameFun=%d doQuote=%d args.nrow=%"PRId64" args.ncol=%d eolLen=%d\n"),
args.doRowNames, args.rowNames, args.rowNameFun, doQuote, args.nrow, args.ncol, eolLen);
}

// Calculate upper bound for line length. Numbers use a fixed maximum (e.g. 12 for integer) while strings find the longest
Expand All @@ -639,8 +639,10 @@ void fwriteMain(fwriteMainArgs args)
double t0 = wallclock();
size_t maxLineLen = eolLen + args.ncol*(2*(doQuote!=0) + sepLen);
if (args.doRowNames) {
maxLineLen += args.rowNames ? getMaxStringLen(args.rowNames, args.nrow)*2 : 1+(int)log10(args.nrow); // the width of the row number
maxLineLen += 2*(doQuote!=0/*NA('auto') or true*/) + sepLen;
maxLineLen += args.rowNames==NULL ? 1+(int)log10(args.nrow) // the width of the row number
: (args.rowNameFun==WF_String ? getMaxStringLen(args.rowNames, args.nrow)*2 // *2 in case longest row name is all quotes (!) and all get escaped
: 11); // specific integer names could be MAX_INT 2147483647 (10 chars) even on a 5 row table, and data.frame allows negative integer rownames hence 11 for the sign
maxLineLen += 2/*possible quotes*/ + sepLen;
}
for (int j=0; j<args.ncol; j++) {
int width = writerMaxLen[args.whichFun[j]];
Expand Down Expand Up @@ -871,15 +873,17 @@ void fwriteMain(fwriteMainArgs args)
if (failed) continue; // Not break. Because we don't use #omp cancel yet.
int64_t end = ((args.nrow - start)<rowsPerBatch) ? args.nrow : start + rowsPerBatch;
for (int64_t i=start; i<end; i++) {
// Tepid starts here (once at beginning of each per line)
// Tepid starts here (once at beginning of each line)
if (args.doRowNames) {
if (args.rowNames==NULL) {
if (doQuote!=0/*NA'auto' or true*/) *ch++='"';
if (doQuote==1) *ch++='"';
int64_t rn = i+1;
writeInt64(&rn, 0, &ch);
if (doQuote!=0) *ch++='"';
if (doQuote==1) *ch++='"';
} else {
writeString(args.rowNames, i, &ch);
if (args.rowNameFun != WF_String && doQuote==1) *ch++='"';
(args.funs[args.rowNameFun])(args.rowNames, i, &ch); // #5098
if (args.rowNameFun != WF_String && doQuote==1) *ch++='"';
}
*ch = sep;
ch += sepLen;
Expand Down
3 changes: 2 additions & 1 deletion src/fwrite.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ typedef struct fwriteMainArgs

const void *colNames; // NULL means no header, otherwise ncol strings
bool doRowNames; // optional, likely false
const void *rowNames; // if doRowNames is true and rowNames is not NULL then they're used, otherwise row numbers are output.
const void *rowNames; // if doRowNames is true and rowNames is NULL then row numbers are output
uint8_t rowNameFun; // when rowNames is not NULL, which writer to use for them
char sep;
char sep2;
char dec;
Expand Down
19 changes: 16 additions & 3 deletions src/fwriteR.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ SEXP fwriteR(
SEXP bom_Arg,
SEXP yaml_Arg,
SEXP verbose_Arg,
SEXP encoding_Arg,
SEXP rn_Arg // the actual row.names to be used #4957
SEXP encoding_Arg
)
{
if (!isNewList(DF)) error(_("fwrite must be passed an object of type list; e.g. data.frame, data.table"));
Expand Down Expand Up @@ -257,8 +256,22 @@ SEXP fwriteR(
// so we need a separate boolean flag as well as the row names should they exist (rare)
args.doRowNames = LOGICAL(rowNames_Arg)[0];
args.rowNames = NULL;
args.rowNameFun = 0;
if (args.doRowNames) {
args.rowNames = DATAPTR_RO(rn_Arg);
SEXP rn = PROTECT(getAttrib(DF, R_RowNamesSymbol));
protecti++;
if (isInteger(rn)) {
if (xlength(rn)!=2 || INTEGER(rn)[0]==NA_INTEGER) {

This comment has been minimized.

Copy link
@MichaelChirico

MichaelChirico Aug 10, 2021

Member

does a 0-row table have 0-length row names? if so [0] here is out of bounds. could it also happen of the next line is an error?

This comment has been minimized.

Copy link
@mattdowle

mattdowle Aug 10, 2021

Author Member

yes but the || short-circuits; i.e. INTEGER(rn)[0] only executes if xlength(rn)==2

This comment has been minimized.

Copy link
@MichaelChirico

MichaelChirico Aug 10, 2021

Member

ah yes, thanks. I shouldn't review code before fully waking up 🙃

// not R's default rownames c(NA,-nrow)
if (xlength(rn) != args.nrow)
error(_("input has specific integer rownames but their length (%"PRId64") != nrow (%"PRId64")"), xlength(rn), args.nrow); // # nocov
args.rowNames = INTEGER(rn);
args.rowNameFun = WF_Int32;
}
} else if (isString(rn)) {
args.rowNames = DATAPTR_RO(rn);
args.rowNameFun = WF_String;
}
}

args.sep = *CHAR(STRING_ELT(sep_Arg, 0)); // DO NOT DO: allow multichar separator (bad idea)
Expand Down

0 comments on commit f80ad26

Please sign in to comment.