From 4bdf34fd8ced59626636255ab50eaba18c108374 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>
Date: Tue, 3 Aug 2021 20:27:25 +0200
Subject: [PATCH 1/3] added fwrite sep=''

---
 R/fwrite.R            |  2 +-
 inst/tests/tests.Rraw |  8 ++++++++
 src/fwrite.c          | 12 ++++++------
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/R/fwrite.R b/R/fwrite.R
index 3f85ff1ea..c822b0567 100644
--- a/R/fwrite.R
+++ b/R/fwrite.R
@@ -42,7 +42,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
   }
   stopifnot(is.list(x),
     identical(quote,"auto") || isTRUEorFALSE(quote),
-    is.character(sep) && length(sep)==1L && nchar(sep) == 1L,
+    is.character(sep) && length(sep)==1L && (nchar(sep) == 1L || sep == ""),
     is.character(sep2) && length(sep2)==3L && nchar(sep2[2L])==1L,
     is.character(dec) && length(dec)==1L && nchar(dec) == 1L,
     dec != sep,  # sep2!=dec and sep2!=sep checked at C level when we know if list columns are present
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e30cd255d..20b2300c2 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17810,3 +17810,11 @@ setDF(d)
 d[1:50, "a"] = d[51:100, "a"]
 setDT(d)
 test(2200, nrow(d[a==99]), 2L)
+
+# fwrite now allows sep="", #4817
+test(2201.1, fwrite(data.frame(a="id", b=letters[1:5], c=1:5), sep=""),
+            output = c("abc", paste0("id", letters[1:5], 1:5)))
+test(2201.2, fwrite(data.frame(a="id", b=1:1e2), sep=""), 
+            output = c("ab", paste0("id", 1:1e2)))
+test(2201.3, fwrite(data.table(a=c(NA, 2, 3.01), b=c('foo', NA, 'bar')), sep=""),
+             output=c("ab", "foo", "2", "3.01bar"))
diff --git a/src/fwrite.c b/src/fwrite.c
index 7bad0cd16..2ce3403ba 100644
--- a/src/fwrite.c
+++ b/src/fwrite.c
@@ -716,13 +716,13 @@ void fwriteMain(fwriteMainArgs args)
       if (args.doRowNames) {
         // Unusual: the extra blank column name when row_names are added as the first column
         if (doQuote!=0/*'auto'(NA) or true*/) { *ch++='"'; *ch++='"'; } // to match write.csv
-        *ch++ = sep;
+        if (sep != '\0') *ch++ = sep;
       }
       for (int j=0; j<args.ncol; j++) {
         writeString(args.colNames, j, &ch);
-        *ch++ = sep;
+        if (sep != '\0') *ch++ = sep;
       }
-      ch--; // backup over the last sep
+      if (sep != '\0') ch--; // backup over the last sep
       write_chars(args.eol, &ch);
     }
     if (f==-1) {
@@ -877,15 +877,15 @@ void fwriteMain(fwriteMainArgs args)
           } else {
             writeString(args.rowNames, i, &ch);
           }
-          *ch++=sep;
+          if (sep != '\0') *ch++=sep;
         }
         // Hot loop
         for (int j=0; j<args.ncol; j++) {
           (args.funs[args.whichFun[j]])(args.columns[j], i, &ch);
-          *ch++ = sep;
+          if (sep != '\0') *ch++ = sep;
         }
         // Tepid again (once at the end of each line)
-        ch--;  // backup onto the last sep after the last column. ncol>=1 because 0-columns was caught earlier.
+        if (sep != '\0') ch--;  // backup onto the last sep after the last column. ncol>=1 because 0-columns was caught earlier.
         write_chars(args.eol, &ch);  // overwrite last sep with eol instead
       }
       // compress buffer if gzip

From 810ab5151badb38c07eb4f003975c7d801d01f95 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 4 Aug 2021 17:45:09 -0600
Subject: [PATCH 2/3] added news item

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index f944a2ffb..b5b93e354 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -97,6 +97,8 @@
 
 15. New convenience function `%plike%` maps to `like(..., perl=TRUE)`, [#3702](https://github.com/Rdatatable/data.table/issues/3702). `%plike%` uses Perl-compatible regular expressions (PCRE) which extend TRE, and may be more efficient in some cases. Thanks @KyleHaynes for the suggestion and PR.
 
+16. `fwrite()` now accepts `sep=""`, [#4817](https://github.com/Rdatatable/data.table/issues/4817). The motivation is an example where the result of `paste0()` needs to be written to file but `paste0()` takes 40 minutes due to constructing a very large number of unique long strings in R's global character cache. Allowing `fwrite(, sep="")` avoids the `paste0` and saves 40 mins. Thanks to Jan Gorecki for the request, and Ben Schwen for the PR.
+
 ## BUG FIXES
 
 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.

From 5ec80e5b55a6e731a5227c71a458896d65da7572 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 4 Aug 2021 18:03:08 -0600
Subject: [PATCH 3/3] replace branches with ch+=sepLen

---
 src/fwrite.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/fwrite.c b/src/fwrite.c
index 2ce3403ba..f7f400318 100644
--- a/src/fwrite.c
+++ b/src/fwrite.c
@@ -35,6 +35,7 @@
 // Globals for this file only. Written once to hold parameters passed from R level.
 static const char *na;                 // by default "" or if set (not recommended) then usually "NA"
 static char sep;                       // comma in .csv files
+static int sepLen;                     // 0 when sep="" for #4817, otherwise 1
 static char sep2;                      // '|' within list columns. Used here to know if field should be quoted and in freadR.c to write sep2 in list columns
 static char dec;                       // the '.' in the number 3.1416. In Europe often: 3,1416
 static int8_t doQuote=INT8_MIN;        // whether to surround fields with double quote ". NA means 'auto' (default)
@@ -590,6 +591,7 @@ void fwriteMain(fwriteMainArgs args)
 
   na = args.na;
   sep = args.sep;
+  sepLen = sep=='\0' ? 0 : 1;
   sep2 = args.sep2;
   dec = args.dec;
   scipen = args.scipen;
@@ -635,10 +637,10 @@ void fwriteMain(fwriteMainArgs args)
   // could be console output) and writing column names to it.
 
   double t0 = wallclock();
-  size_t maxLineLen = eolLen + args.ncol*(2*(doQuote!=0) + 1/*sep*/);
+  size_t maxLineLen = eolLen + args.ncol*(2*(doQuote!=0) + sepLen);
   if (args.doRowNames) {
     maxLineLen += args.rowNames ? getMaxStringLen(args.rowNames, args.nrow)*2 : 1+(int)log10(args.nrow);  // the width of the row number
-    maxLineLen += 2*(doQuote!=0/*NA('auto') or true*/) + 1/*sep*/;
+    maxLineLen += 2*(doQuote!=0/*NA('auto') or true*/) + sepLen;
   }
   for (int j=0; j<args.ncol; j++) {
     int width = writerMaxLen[args.whichFun[j]];
@@ -703,7 +705,7 @@ void fwriteMain(fwriteMainArgs args)
   headerLen += yamlLen;
   if (args.colNames) {
     for (int j=0; j<args.ncol; j++) headerLen += getStringLen(args.colNames, j)*2;  // *2 in case quotes are escaped or doubled
-    headerLen += args.ncol*(1/*sep*/+(doQuote!=0)*2) + eolLen + 3;  // 3 in case doRowNames and doQuote (the first blank <<"",>> column name)
+    headerLen += args.ncol*(sepLen+(doQuote!=0)*2) + eolLen + 3;  // 3 in case doRowNames and doQuote (the first blank <<"",>> column name)
   }
   if (headerLen) {
     char *buff = malloc(headerLen);
@@ -716,13 +718,15 @@ void fwriteMain(fwriteMainArgs args)
       if (args.doRowNames) {
         // Unusual: the extra blank column name when row_names are added as the first column
         if (doQuote!=0/*'auto'(NA) or true*/) { *ch++='"'; *ch++='"'; } // to match write.csv
-        if (sep != '\0') *ch++ = sep;
+        *ch = sep;
+        ch += sepLen;
       }
       for (int j=0; j<args.ncol; j++) {
         writeString(args.colNames, j, &ch);
-        if (sep != '\0') *ch++ = sep;
+        *ch = sep;
+        ch += sepLen;
       }
-      if (sep != '\0') ch--; // backup over the last sep
+      ch -= sepLen; // backup over the last sep
       write_chars(args.eol, &ch);
     }
     if (f==-1) {
@@ -877,15 +881,17 @@ void fwriteMain(fwriteMainArgs args)
           } else {
             writeString(args.rowNames, i, &ch);
           }
-          if (sep != '\0') *ch++=sep;
+          *ch = sep;
+          ch += sepLen;
         }
         // Hot loop
         for (int j=0; j<args.ncol; j++) {
           (args.funs[args.whichFun[j]])(args.columns[j], i, &ch);
-          if (sep != '\0') *ch++ = sep;
+          *ch = sep;
+          ch += sepLen;
         }
         // Tepid again (once at the end of each line)
-        if (sep != '\0') ch--;  // backup onto the last sep after the last column. ncol>=1 because 0-columns was caught earlier.
+        ch -= sepLen;  // backup onto the last sep after the last column. ncol>=1 because 0-columns was caught earlier.
         write_chars(args.eol, &ch);  // overwrite last sep with eol instead
       }
       // compress buffer if gzip