From f30859600e98291be86b217b1b2aa3e95b7a6e14 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Sat, 15 Jun 2024 02:28:21 +0530 Subject: [PATCH 1/8] returning filepath from fwrite --- NEWS.md | 2 ++ R/fwrite.R | 6 +++--- inst/tests/tests.Rraw | 18 +++++++++--------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index b1e760b52..62f3f0665 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,6 +44,8 @@ 14. `fread` loads `.bgz` files directly, [#5461](https://github.com/Rdatatable/data.table/issues/5461). Thanks to @TMRHarrison for the request with proposed fix, and Benjamin Schwendinger for the PR. +15. `fwrite` now returns the file path after writing data, [#5706](https://github.com/Rdatatable/data.table/issues/5706). This enhancement allows users to easily capture and utilize the file path for subsequent operations.Thanks to @Nj221102 for the PR. + ## BUG FIXES 1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix. diff --git a/R/fwrite.R b/R/fwrite.R index ad92859f3..2c8d66e1d 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -84,11 +84,11 @@ fwrite = function(x, file="", append=FALSE, quote="auto", if (file.exists(file)) { suggested <- if (append) "" else gettextf("\nIf you intended to overwrite the file at %s with an empty one, please use file.remove first.", file) warningf("Input has no columns; doing nothing.%s", suggested) - return(invisible()) + return(invisible(file)) } else { warningf("Input has no columns; creating an empty file at '%s' and exiting.", file) file.create(file) - return(invisible()) + return(invisible(file)) } } yaml = if (!yaml) "" else { @@ -118,7 +118,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", .Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append, row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread, showProgress, is_gzip, bom, yaml, verbose, encoding) - invisible() + invisible(file) } haszlib = function() .Call(Cdt_has_zlib) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f49420b31..f87670e15 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9977,7 +9977,7 @@ test(1658.31, fwrite(ok_dt, qmethod=c("double", "double")), error="length(qmetho test(1658.32, fwrite(ok_dt, col.names="foobar"), error="isTRUEorFALSE(col.names)") # null data table (no columns) -test(1658.33, fwrite(data.table(NULL)), NULL, warning="Nothing to write") +test(1658.33, fwrite(data.table(NULL)), "", warning="Nothing to write") test(1658.34, fwrite(data.table(id=c("A","B","C"), v=c(1.1,0.0,9.9))), output="id,v\nA,1.1\nB,0\nC,9.9") @@ -10059,7 +10059,7 @@ test(1658.56, fwrite(data.table(exp(1) - pi*1i)), output='2.718[0-9]*-3.141[0-9] DT = data.table(a=1:3, b=list(1:4, c(3.14, 100e10), c(3i,4i,5i))) test(1658.57, fwrite(DT), output='0+3i|0+4i|0+5i') DT[ , b := c(1i, -1-1i, NA_complex_)] -test(1658.58, fwrite(DT), output='a,b\n1,0\\+1i\n2,-1-1i\n3,$') +test(1658.58, fwrite(DT), output='a,b\n1,0+1i\n2,-1-1i\n3,\n[1] ""') # more coverage test(1658.59, fwrite(data.table(a=list('a')), verbose=TRUE), @@ -10809,7 +10809,7 @@ if (test_bit64) { test(1731.1, class(DT[[1L]]), "integer64") test(1731.2, fwrite(DT,na="__NA__"), output=ans) f = tempfile() - test(1731.3, fwrite(DT, f, na="__NA__"), NULL) + test(1731.3, filepath <- fwrite(DT, f, na="__NA__"), filepath) test(1731.4, readLines(f), ans) unlink(f) ans[1] = "V1" # the field is unquoted under `quote=FALSE` @@ -10910,7 +10910,7 @@ test(1736.09, capture.output(fwrite(DT)), c("A,B", "foo,1|2|3", "\"ba|r\",1|2|3| test(1736.10, capture.output(fwrite(DT,quote=TRUE)), c("\"A\",\"B\"", "\"foo\",1|2|3", "\"ba|r\",1|2|3|4", "\"baz\",\"fo|o\"|\"ba,r\"|\"baz\"")) # any list of same length vector input -test(1737.1, fwrite(list()), NULL, warning="fwrite was passed an empty list of no columns") +test(1737.1, fwrite(list()), "", warning="fwrite was passed an empty list of no columns") test(1737.2, fwrite(list(1.2)), output="1.2") test(1737.3, fwrite(list(1.2,B="foo")), output=",B\n1.2,foo") test(1737.4, fwrite(list("A,Name"=1.2,B="fo,o")), output="\"A,Name\",B\n1.2,\"fo,o\"") @@ -13089,11 +13089,11 @@ unlink(f) #fwrite creates a file or does nothing, as appropriate, also #2898 DT = data.table(NULL) f = tempfile() -test(1922.7, fwrite(DT, f), NULL, warning = 'no columns; creating an empty file') +test(1922.7, filepath <- fwrite(DT, f), filepath, warning = 'no columns; creating an empty file') ## above test created a file; now test behavior when file exists -test(1922.8, fwrite(DT, f), NULL, warning = 'no columns; doing nothing.*file.remove') +test(1922.8, filepath <- fwrite(DT, f), filepath, warning = 'no columns; doing nothing.*file.remove') ## slightly different behavior if append = TRUE -test(1922.9, fwrite(DT, f, append = TRUE), NULL, warning = 'doing nothing.$') +test(1922.9, filepath <- fwrite(DT, f, append = TRUE), filepath, warning = 'doing nothing.$') # create index even if key present by setting attribute, #2883 DT = data.table(1:5, 1:5) @@ -13141,8 +13141,8 @@ test(1937.1, DT[A %between% c(B,B+1)], error='RHS has length().*Perhaps you mean test(1937.2, DT[A %between% B], error='length 2. The first') # that fwrite'ing a list to a file works (it broke in dev 1.11.5 and was caught before release), PR#3017 -test(1938.1, fwrite(list(1:3)), NULL, output="1\n2\n3") # never broke -test(1938.2, fwrite(list(1:3), file=f<-tempfile()), NULL) # just adding file= was what broke in dev just when x is list and not data.table|frame +test(1938.1, fwrite(list(1:3)), "", output="1\n2\n3") # never broke +test(1938.2, filepath <- fwrite(list(1:3), file=f<-tempfile()), filepath) # just adding file= was what broke in dev just when x is list and not data.table|frame test(1939.3, readLines(f), as.character(1:3)) unlink(f) From 470b276872ede56555d0ea4b471c78f54ceb10c1 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Sat, 15 Jun 2024 02:47:18 +0530 Subject: [PATCH 2/8] updated tests --- inst/tests/tests.Rraw | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f87670e15..83dadabaf 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10009,9 +10009,9 @@ if (!haszlib()) { } else { test(1658.41, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), output='a,b\n1,1\n2,2\n3,3') # compress ignored on console DT = data.table(a=rep(1:2,each=100), b=rep(1:4,each=25)) - test(1658.421, fwrite(DT, file=f1<-tempfile(fileext=".gz"), verbose=TRUE), NULL, + test(1658.421, filepath <- fwrite(DT, file=f1<-tempfile(fileext=".gz"), verbose=TRUE), filepath, output="args.nrow=200 args.ncol=2.*maxLineLen=5[12].*Writing 200 rows in 1 batches of 200 rows.*nth=1") # [12] for Windows where eolLen==2 - test(1658.422, fwrite(DT, file=f2<-tempfile()), NULL) + test(1658.422, filepath <- fwrite(DT, file=f2<-tempfile()), filepath) test(1658.423, file.info(f1)$size < file.info(f2)$size) # 74 < 804 (file.size() isn't available in R 3.1.0) if (test_R.utils) test(1658.43, fread(f1), DT) # use fread to decompress gz (works cross-platform) fwrite(DT, file=f3<-tempfile(), compress="gzip") # compress to filename not ending .gz @@ -10046,7 +10046,7 @@ unlink(c(f1, f2)) # compression error -5 due to only 3 bytes (bom) in first block; #3599 if (haszlib()) { DT = data.table(l=letters, n=1:26) - test(1658.53, fwrite(DT, file=f<-tempfile(fileext=".gz"), bom=TRUE, col.names=FALSE), NULL) + test(1658.53, filepath <- fwrite(DT, file=f<-tempfile(fileext=".gz"), bom=TRUE, col.names=FALSE), filepath) if (test_R.utils) test(1658.54, fread(f), setnames(DT,c("V1","V2"))) unlink(f) } From 10fcde8030ad42ade5c0e85c54c9e98894204a68 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Sat, 15 Jun 2024 02:56:18 +0530 Subject: [PATCH 3/8] updated tests --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 83dadabaf..69bf63c82 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13775,9 +13775,9 @@ if (.Platform$OS.type=="windows") { test(1966.4, list.files(path = pth, pattern = "\\.csv$"), f) unlink(c(fp, file.path(pth, "\u00c3\u00b6.csv"))) p = file.path(pth, "\u00fc"); dir.create(p); f = tempfile(tmpdir = p) - test(1966.5, fwrite(DT, enc2native(f)), NULL) + test(1966.5, filepath <- fwrite(DT, enc2native(f)), filepath) unlink(f) - test(1966.6, fwrite(DT, enc2utf8(f)), NULL) + test(1966.6, filepath <- fwrite(DT, enc2utf8(f)), filepath) unlink(p, recursive = TRUE) } From 94c0730a6e6d101861de8694e5512c14c9b6b27b Mon Sep 17 00:00:00 2001 From: nitish jha Date: Sat, 15 Jun 2024 09:29:12 +0530 Subject: [PATCH 4/8] updated news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 62f3f0665..ca9360220 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,7 +44,7 @@ 14. `fread` loads `.bgz` files directly, [#5461](https://github.com/Rdatatable/data.table/issues/5461). Thanks to @TMRHarrison for the request with proposed fix, and Benjamin Schwendinger for the PR. -15. `fwrite` now returns the file path after writing data, [#5706](https://github.com/Rdatatable/data.table/issues/5706). This enhancement allows users to easily capture and utilize the file path for subsequent operations.Thanks to @Nj221102 for the PR. +15. `fwrite` now returns the file path after writing data, [#5706](https://github.com/Rdatatable/data.table/issues/5706). This enhancement allows users to easily capture and utilize the file path for subsequent operations. Thanks to @eliocamp for the request and @Nj221102 for the PR. ## BUG FIXES From 7257b5a971c9658cf43b8cc0659e82473121fc21 Mon Sep 17 00:00:00 2001 From: Nitish Jha <151559388+Nj221102@users.noreply.github.com> Date: Sat, 15 Jun 2024 13:30:34 +0530 Subject: [PATCH 5/8] Update NEWS.md Co-authored-by: Michael Chirico --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index ca9360220..f47a9346a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,7 +44,7 @@ 14. `fread` loads `.bgz` files directly, [#5461](https://github.com/Rdatatable/data.table/issues/5461). Thanks to @TMRHarrison for the request with proposed fix, and Benjamin Schwendinger for the PR. -15. `fwrite` now returns the file path after writing data, [#5706](https://github.com/Rdatatable/data.table/issues/5706). This enhancement allows users to easily capture and utilize the file path for subsequent operations. Thanks to @eliocamp for the request and @Nj221102 for the PR. +15. `fwrite` now returns the file path instead of `NULL`, [#5706](https://github.com/Rdatatable/data.table/issues/5706). This can be useful to subsequent operations in scripts, especially some pipelines. Thanks to @eliocamp for the request and @Nj221102 for the PR. ## BUG FIXES From 68a7b1f8bf16d80742e101b8eedabbb72d65ed66 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Sat, 15 Jun 2024 14:09:22 +0530 Subject: [PATCH 6/8] updated tests --- inst/tests/tests.Rraw | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 69bf63c82..c11703e53 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10009,9 +10009,9 @@ if (!haszlib()) { } else { test(1658.41, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), output='a,b\n1,1\n2,2\n3,3') # compress ignored on console DT = data.table(a=rep(1:2,each=100), b=rep(1:4,each=25)) - test(1658.421, filepath <- fwrite(DT, file=f1<-tempfile(fileext=".gz"), verbose=TRUE), filepath, + test(1658.421, fwrite(DT, file=f1<-tempfile(fileext=".gz"), verbose=TRUE), f1, output="args.nrow=200 args.ncol=2.*maxLineLen=5[12].*Writing 200 rows in 1 batches of 200 rows.*nth=1") # [12] for Windows where eolLen==2 - test(1658.422, filepath <- fwrite(DT, file=f2<-tempfile()), filepath) + test(1658.422, fwrite(DT, file=f2<-tempfile()), f2) test(1658.423, file.info(f1)$size < file.info(f2)$size) # 74 < 804 (file.size() isn't available in R 3.1.0) if (test_R.utils) test(1658.43, fread(f1), DT) # use fread to decompress gz (works cross-platform) fwrite(DT, file=f3<-tempfile(), compress="gzip") # compress to filename not ending .gz @@ -10046,7 +10046,7 @@ unlink(c(f1, f2)) # compression error -5 due to only 3 bytes (bom) in first block; #3599 if (haszlib()) { DT = data.table(l=letters, n=1:26) - test(1658.53, filepath <- fwrite(DT, file=f<-tempfile(fileext=".gz"), bom=TRUE, col.names=FALSE), filepath) + test(1658.53, fwrite(DT, file=f<-tempfile(fileext=".gz"), bom=TRUE, col.names=FALSE), f) if (test_R.utils) test(1658.54, fread(f), setnames(DT,c("V1","V2"))) unlink(f) } @@ -10059,7 +10059,7 @@ test(1658.56, fwrite(data.table(exp(1) - pi*1i)), output='2.718[0-9]*-3.141[0-9] DT = data.table(a=1:3, b=list(1:4, c(3.14, 100e10), c(3i,4i,5i))) test(1658.57, fwrite(DT), output='0+3i|0+4i|0+5i') DT[ , b := c(1i, -1-1i, NA_complex_)] -test(1658.58, fwrite(DT), output='a,b\n1,0+1i\n2,-1-1i\n3,\n[1] ""') +test(1658.58, invisible(fwrite(DT)), output='a,b\n1,0\\+1i\n2,-1-1i\n3,$') # more coverage test(1658.59, fwrite(data.table(a=list('a')), verbose=TRUE), @@ -10809,7 +10809,7 @@ if (test_bit64) { test(1731.1, class(DT[[1L]]), "integer64") test(1731.2, fwrite(DT,na="__NA__"), output=ans) f = tempfile() - test(1731.3, filepath <- fwrite(DT, f, na="__NA__"), filepath) + test(1731.3, fwrite(DT, f, na="__NA__"), f) test(1731.4, readLines(f), ans) unlink(f) ans[1] = "V1" # the field is unquoted under `quote=FALSE` @@ -13089,11 +13089,11 @@ unlink(f) #fwrite creates a file or does nothing, as appropriate, also #2898 DT = data.table(NULL) f = tempfile() -test(1922.7, filepath <- fwrite(DT, f), filepath, warning = 'no columns; creating an empty file') +test(1922.7, fwrite(DT, f), f, warning = 'no columns; creating an empty file') ## above test created a file; now test behavior when file exists -test(1922.8, filepath <- fwrite(DT, f), filepath, warning = 'no columns; doing nothing.*file.remove') +test(1922.8, fwrite(DT, f), f, warning = 'no columns; doing nothing.*file.remove') ## slightly different behavior if append = TRUE -test(1922.9, filepath <- fwrite(DT, f, append = TRUE), filepath, warning = 'doing nothing.$') +test(1922.9, fwrite(DT, f, append = TRUE), f, warning = 'doing nothing.$') # create index even if key present by setting attribute, #2883 DT = data.table(1:5, 1:5) @@ -13142,7 +13142,7 @@ test(1937.2, DT[A %between% B], error='length 2. The first') # that fwrite'ing a list to a file works (it broke in dev 1.11.5 and was caught before release), PR#3017 test(1938.1, fwrite(list(1:3)), "", output="1\n2\n3") # never broke -test(1938.2, filepath <- fwrite(list(1:3), file=f<-tempfile()), filepath) # just adding file= was what broke in dev just when x is list and not data.table|frame +test(1938.2, fwrite(list(1:3), file=f<-tempfile()), f) # just adding file= was what broke in dev just when x is list and not data.table|frame test(1939.3, readLines(f), as.character(1:3)) unlink(f) @@ -13775,9 +13775,9 @@ if (.Platform$OS.type=="windows") { test(1966.4, list.files(path = pth, pattern = "\\.csv$"), f) unlink(c(fp, file.path(pth, "\u00c3\u00b6.csv"))) p = file.path(pth, "\u00fc"); dir.create(p); f = tempfile(tmpdir = p) - test(1966.5, filepath <- fwrite(DT, enc2native(f)), filepath) + test(1966.5, fwrite(DT,filepath<-enc2native(f)), filepath) unlink(f) - test(1966.6, filepath <- fwrite(DT, enc2utf8(f)), filepath) + test(1966.6, fwrite(DT, filepath<-enc2native(f)), filepath) unlink(p, recursive = TRUE) } From 9e13703a19611fbfe53637eeccbcecdcc61d1804 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Sat, 15 Jun 2024 14:18:35 +0530 Subject: [PATCH 7/8] updated tests --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c11703e53..a7854242f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10059,7 +10059,7 @@ test(1658.56, fwrite(data.table(exp(1) - pi*1i)), output='2.718[0-9]*-3.141[0-9] DT = data.table(a=1:3, b=list(1:4, c(3.14, 100e10), c(3i,4i,5i))) test(1658.57, fwrite(DT), output='0+3i|0+4i|0+5i') DT[ , b := c(1i, -1-1i, NA_complex_)] -test(1658.58, invisible(fwrite(DT)), output='a,b\n1,0\\+1i\n2,-1-1i\n3,$') +test(1658.58, invisible(fwrite(DT)), output='a,b\n1,0+1i\n2,-1-1i\n3,\n[1] ""') # more coverage test(1658.59, fwrite(data.table(a=list('a')), verbose=TRUE), From f0e5510d53cb80734893f95e7899828d656a24f3 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Sat, 15 Jun 2024 21:30:27 +0530 Subject: [PATCH 8/8] updated tests --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a7854242f..fe08657cb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10059,7 +10059,7 @@ test(1658.56, fwrite(data.table(exp(1) - pi*1i)), output='2.718[0-9]*-3.141[0-9] DT = data.table(a=1:3, b=list(1:4, c(3.14, 100e10), c(3i,4i,5i))) test(1658.57, fwrite(DT), output='0+3i|0+4i|0+5i') DT[ , b := c(1i, -1-1i, NA_complex_)] -test(1658.58, invisible(fwrite(DT)), output='a,b\n1,0+1i\n2,-1-1i\n3,\n[1] ""') +test(1658.58, invisible(fwrite(DT)), output='a,b\n1,0+1i\n2,-1-1i\n3,\n') # more coverage test(1658.59, fwrite(data.table(a=list('a')), verbose=TRUE),