From bec394519826529c02adedfdd601f04b45f859c2 Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Wed, 7 Feb 2024 11:50:52 -0500
Subject: [PATCH] 8524 adding mechanism for storing tab. files with variable
 headers (#10282)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* "stored with header" flag #8524

* more changes for the streaming and redirect code. #8524

* disabling dynamically-generated varheader in the remaining storage drivers. #8524

* Ingest plugins (work in progress) #8524

* R ingest plugin (#8524)

* still some unaddressed @todo:s, but the branch should build and the unit tests should be passing. # 8524

* work-in-progress, on the subsetting code in the download instance writer. #8524

* more work-in-progress changes. removing all the unused code from TabularSubsetGenerator, for clarity etc. #8524

* more bits and pieces #8524

* 2 more ingest plugins. #8542

* Integration tests. #8524

* typo #8524

* documenting the new setting. #8524

* a release note for the pr. also, added the "storage quotas enabled" to the list of settings documented in the config guide while I was at it. #8524

* removed all the unused code from this class (lots of it) for clarity, etc. git history can be consulted if anyone is curious about what we used to do here. #8524

* removing @todo: that's no longer relevant #8524

* (cosmetic) defined the control constants used in the integration test. #8524
---
 ...4-storing-tabular-files-with-varheaders.md |    6 +
 .../source/installation/config.rst            |   22 +
 .../edu/harvard/iq/dataverse/DataTable.java   |   18 +
 .../dataverse/api/DownloadInstanceWriter.java |   78 +-
 .../harvard/iq/dataverse/api/TestIngest.java  |    2 +-
 .../iq/dataverse/dataaccess/FileAccessIO.java |    3 +-
 .../dataaccess/GlobusOverlayAccessIO.java     |    8 +-
 .../dataaccess/RemoteOverlayAccessIO.java     |    8 +-
 .../iq/dataverse/dataaccess/S3AccessIO.java   |    3 +-
 .../dataverse/dataaccess/SwiftAccessIO.java   |    3 +-
 .../dataaccess/TabularSubsetGenerator.java    | 1150 +----------------
 .../dataaccess/TabularSubsetInputStream.java  |  114 --
 .../export/DDIExportServiceBean.java          |   11 +
 .../dataverse/ingest/IngestServiceBean.java   |   64 +-
 .../tabulardata/TabularDataFileReader.java    |   26 +-
 .../impl/plugins/csv/CSVFileReader.java       |   24 +-
 .../impl/plugins/dta/DTAFileReader.java       |   11 +-
 .../impl/plugins/dta/NewDTAFileReader.java    |   19 +-
 .../impl/plugins/por/PORFileReader.java       |   13 +-
 .../impl/plugins/rdata/RDATAFileReader.java   |    4 +-
 .../impl/plugins/rdata/RTabFileParser.java    |   28 +-
 .../impl/plugins/sav/SAVFileReader.java       |   24 +-
 .../impl/plugins/xlsx/XLSXFileReader.java     |   11 +-
 .../settings/SettingsServiceBean.java         |    7 +-
 .../iq/dataverse/util/SystemConfig.java       |    8 +
 ...24-store-tabular-files-with-varheaders.sql |    1 +
 .../edu/harvard/iq/dataverse/api/FilesIT.java |  128 ++
 .../dataverse/ingest/IngestFrequencyTest.java |    2 +-
 .../impl/plugins/csv/CSVFileReaderTest.java   |   24 +-
 .../impl/plugins/dta/DTAFileReaderTest.java   |    2 +-
 .../plugins/dta/NewDTAFileReaderTest.java     |   14 +-
 31 files changed, 501 insertions(+), 1335 deletions(-)
 create mode 100644 doc/release-notes/8524-storing-tabular-files-with-varheaders.md
 delete mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java
 create mode 100644 src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql

diff --git a/doc/release-notes/8524-storing-tabular-files-with-varheaders.md b/doc/release-notes/8524-storing-tabular-files-with-varheaders.md
new file mode 100644
index 00000000000..f7034c846f6
--- /dev/null
+++ b/doc/release-notes/8524-storing-tabular-files-with-varheaders.md
@@ -0,0 +1,6 @@
+Tabular Data Ingest can now save the generated archival files with the list of variable names added as the first tab-delimited line. As the most significant effect of this feature,
+Access API will be able to take advantage of Direct Download for tab. files saved with these headers on S3 - since they no longer have to be generated and added to the streamed content on the fly.
+
+This behavior is controlled by the new setting `:StoreIngestedTabularFilesWithVarHeaders`. It is false by default, preserving the legacy behavior. When enabled, Dataverse will be able to handle both the newly ingested files, and any already-existing legacy files stored without these headers transparently to the user. E.g. the access API will continue delivering tab-delimited files **with** this header line, whether it needs to add it dynamically for the legacy files, or reading complete files directly from storage for the ones stored with it.
+
+An API for converting existing legacy tabular files will be added separately. [this line will need to be changed if we have time to add said API before 6.2 is released].
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index a7d7905ca4a..c233e594fa7 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -4151,3 +4151,25 @@ A true/false (default) option determining whether the dataset datafile table dis
 
 .. _supported MicroProfile Config API source: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html
 
+
+.. _:UseStorageQuotas:
+
+:UseStorageQuotas
++++++++++++++++++
+
+Enables storage use quotas in collections. See the :doc:`/api/native-api` for details.
+
+
+.. _:StoreIngestedTabularFilesWithVarHeaders:
+
+:StoreIngestedTabularFilesWithVarHeaders
+++++++++++++++++++++++++++++++++++++++++
+
+With this setting enabled, tabular files produced during Ingest will
+be stored with the list of variable names added as the first
+tab-delimited line. As the most significant effect of this feature,
+Access API will be able to take advantage of Direct Download for
+tab. files saved with these headers on S3 - since they no longer have
+to be generated and added to the streamed file on the fly.
+
+The setting is ``false`` by default, preserving the legacy behavior. 
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTable.java b/src/main/java/edu/harvard/iq/dataverse/DataTable.java
index a17d8c65138..95f3aed0f40 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataTable.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataTable.java
@@ -112,6 +112,16 @@ public DataTable() {
     @Column( nullable = true )
     private String originalFileName;
     
+    
+    /**
+     * The physical tab-delimited file is in storage with the list of variable
+     * names saved as the 1st line. This means that we do not need to generate 
+     * this line on the fly. (Also means that direct download mechanism can be
+     * used for this file!)
+     */
+    @Column(nullable = false)
+    private boolean storedWithVariableHeader = false;  
+    
     /*
      * Getter and Setter methods:
      */
@@ -206,6 +216,14 @@ public void setOriginalFileName(String originalFileName) {
         this.originalFileName = originalFileName;
     }
     
+    public boolean isStoredWithVariableHeader() {
+        return storedWithVariableHeader;
+    }
+    
+    public void setStoredWithVariableHeader(boolean storedWithVariableHeader) {
+        this.storedWithVariableHeader = storedWithVariableHeader;
+    }
+    
     /* 
      * Custom overrides for hashCode(), equals() and toString() methods:
      */
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index bcb8799ec9e..89b22b76a7d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -22,7 +22,6 @@
 import jakarta.ws.rs.ext.Provider;
 
 import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.FileMetadata;
 import edu.harvard.iq.dataverse.dataaccess.*;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.engine.command.Command;
@@ -104,8 +103,10 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                 String auxiliaryTag = null;
                 String auxiliaryType = null;
                 String auxiliaryFileName = null; 
+                
                 // Before we do anything else, check if this download can be handled 
                 // by a redirect to remote storage (only supported on S3, as of 5.4):
+                
                 if (storageIO.downloadRedirectEnabled()) {
 
                     // Even if the above is true, there are a few cases where a  
@@ -159,7 +160,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         }
 
                     } else if (dataFile.isTabularData()) {
-                        // Many separate special cases here.
+                        // Many separate special cases here. 
 
                         if (di.getConversionParam() != null) {
                             if (di.getConversionParam().equals("format")) {
@@ -180,12 +181,26 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         redirectSupported = false;
                                     }
                                 }
-                            } else if (!di.getConversionParam().equals("noVarHeader")) {
-                                // This is a subset request - can't do. 
+                            } else if (di.getConversionParam().equals("noVarHeader")) {
+                                // This will work just fine, if the tab. file is 
+                                // stored without the var. header. Throw "unavailable"
+                                // exception otherwise. 
+                                // @todo: should we actually drop support for this "noVarHeader" flag?
+                                if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+                                    throw new ServiceUnavailableException();
+                                }
+                                // ... defaults to redirectSupported = true
+                            } else {
+                                // This must be a subset request then - can't do. 
+                                redirectSupported = false; 
+                            } 
+                        } else {
+                            // "straight" download of the full tab-delimited file. 
+                            // can redirect, but only if stored with the variable 
+                            // header already added: 
+                            if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
                                 redirectSupported = false;
                             }
-                        } else {
-                            redirectSupported = false;
                         }
                     }
                 }
@@ -247,11 +262,16 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         // finally, issue the redirect:
                         Response response = Response.seeOther(redirect_uri).build();
                         logger.fine("Issuing redirect to the file location.");
+                        // Yes, this throws an exception. It's not an exception 
+                        // as in, "bummer, something went wrong". This is how a 
+                        // redirect is produced here!
                         throw new RedirectionException(response);
                     }
                     throw new ServiceUnavailableException();
                 }
 
+                // Past this point, this is a locally served/streamed download
+                
                 if (di.getConversionParam() != null) {
                     // Image Thumbnail and Tabular data conversion: 
                     // NOTE: only supported on local files, as of 4.0.2!
@@ -285,9 +305,14 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         // request any tabular-specific services. 
 
                         if (di.getConversionParam().equals("noVarHeader")) {
-                            logger.fine("tabular data with no var header requested");
-                            storageIO.setNoVarHeader(Boolean.TRUE);
-                            storageIO.setVarHeader(null);
+                            if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
+                                logger.fine("tabular data with no var header requested");
+                                storageIO.setNoVarHeader(Boolean.TRUE);
+                                storageIO.setVarHeader(null);
+                            } else {
+                                logger.fine("can't serve request for tabular data without varheader, since stored with it");
+                                throw new ServiceUnavailableException();
+                            }
                         } else if (di.getConversionParam().equals("format")) {
                             // Conversions, and downloads of "stored originals" are 
                             // now supported on all DataFiles for which StorageIO 
@@ -329,11 +354,10 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) {
                                             logger.fine("adding variable id " + variable.getId() + " to the list.");
                                             variablePositionIndex.add(variable.getFileOrder());
-                                            if (subsetVariableHeader == null) {
-                                                subsetVariableHeader = variable.getName();
-                                            } else {
-                                                subsetVariableHeader = subsetVariableHeader.concat("\t");
-                                                subsetVariableHeader = subsetVariableHeader.concat(variable.getName());
+                                            if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
+                                                subsetVariableHeader = subsetVariableHeader == null 
+                                                        ? variable.getName()
+                                                        : subsetVariableHeader.concat("\t" + variable.getName());
                                             }
                                         } else {
                                             logger.warning("variable does not belong to this data file.");
@@ -346,7 +370,17 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                     try {
                                         File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp");
                                         TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator();
-                                        tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t");
+                                        
+                                        long numberOfLines = dataFile.getDataTable().getCaseQuantity();
+                                        if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+                                            numberOfLines++;
+                                        }
+                                        
+                                        tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), 
+                                                tempSubsetFile.getAbsolutePath(), 
+                                                variablePositionIndex, 
+                                                numberOfLines, 
+                                                "\t");
 
                                         if (tempSubsetFile.exists()) {
                                             FileInputStream subsetStream = new FileInputStream(tempSubsetFile);
@@ -354,8 +388,11 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
 
                                             InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize);
                                             logger.fine("successfully created subset output stream.");
-                                            subsetVariableHeader = subsetVariableHeader.concat("\n");
-                                            subsetStreamIO.setVarHeader(subsetVariableHeader);
+                                            
+                                            if (subsetVariableHeader != null) {
+                                                subsetVariableHeader = subsetVariableHeader.concat("\n");
+                                                subsetStreamIO.setVarHeader(subsetVariableHeader);
+                                            }
 
                                             String tabularFileName = storageIO.getFileName();
 
@@ -380,8 +417,13 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             } else {
                                 logger.fine("empty list of extra arguments.");
                             }
+                            // end of tab. data subset case
+                        } else if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+                            logger.fine("tabular file stored with the var header included, no need to generate it on the fly");
+                            storageIO.setNoVarHeader(Boolean.TRUE);
+                            storageIO.setVarHeader(null);
                         }
-                    }
+                    } // end of tab. data file case
 
                     if (storageIO == null) {
                         //throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
index 05ba150df8e..add43ea2091 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
@@ -100,7 +100,7 @@ public String datafile(@QueryParam("fileName") String fileName, @QueryParam("fil
         TabularDataIngest tabDataIngest = null;
         
         try {
-            tabDataIngest = ingestPlugin.read(fileInputStream, null);
+            tabDataIngest = ingestPlugin.read(fileInputStream, false, null);
         } catch (IOException ingestEx) {
             output = output.concat("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage());
             return output;
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
index f2a1312a150..26637ec5742 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
@@ -120,7 +120,8 @@ public void open (DataAccessOption... options) throws IOException {
                         && dataFile.getContentType().equals("text/tab-separated-values")
                         && dataFile.isTabularData()
                         && dataFile.getDataTable() != null
-                        && (!this.noVarHeader())) {
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 7a6809cb2ff..733daaf1328 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -450,8 +450,12 @@ public void open(DataAccessOption... options) throws IOException {
                     this.setSize(retrieveSizeFromMedia());
                 }
                 // Only applies for the S3 Connector case (where we could have run an ingest)
-                if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
-                        && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
+                if (dataFile.getContentType() != null 
+                        && dataFile.getContentType().equals("text/tab-separated-values")
+                        && dataFile.isTabularData() 
+                        && dataFile.getDataTable() != null 
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index 1616bfabf96..bca70259cb7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -124,8 +124,12 @@ public void open(DataAccessOption... options) throws IOException {
                     logger.fine("Setting size");
                     this.setSize(retrieveSizeFromMedia());
                 }
-                if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
-                        && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
+                if (dataFile.getContentType() != null 
+                        && dataFile.getContentType().equals("text/tab-separated-values")
+                        && dataFile.isTabularData() 
+                        && dataFile.getDataTable() != null 
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 8afc365417e..c2143bd4789 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -225,7 +225,8 @@ public void open(DataAccessOption... options) throws IOException {
                         && dataFile.getContentType().equals("text/tab-separated-values")
                         && dataFile.isTabularData()
                         && dataFile.getDataTable() != null
-                        && (!this.noVarHeader())) {
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
index 105a60ab418..717f46ffd60 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
@@ -142,7 +142,8 @@ public void open(DataAccessOption... options) throws IOException {
                         && dataFile.getContentType().equals("text/tab-separated-values")
                         && dataFile.isTabularData()
                         && dataFile.getDataTable() != null
-                        && (!this.noVarHeader())) {
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
index 782f7f3a52d..c369010c8cd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
@@ -60,305 +60,26 @@
  
 public class TabularSubsetGenerator implements SubsetGenerator {
 
-    private static Logger dbgLog = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName());
+    private static Logger logger = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName());
 
-    private static int COLUMN_TYPE_STRING = 1;
-    private static int COLUMN_TYPE_LONG   = 2;
-    private static int COLUMN_TYPE_DOUBLE = 3; 
-    private static int COLUMN_TYPE_FLOAT = 4; 
-    
-    private static int MAX_COLUMN_BUFFER = 8192;
-    
-    private FileChannel fileChannel = null; 
-    
-    private int varcount; 
-    private int casecount; 
-    private int subsetcount;
-    
-    private byte[][] columnEntries = null; 
-
-    
-    private ByteBuffer[] columnByteBuffers;
-    private int[] columnBufferSizes;
-    private int[] columnBufferOffsets;
-    
-    private long[] columnStartOffsets; 
-    private long[] columnTotalOffsets;
-    private long[] columnTotalLengths;
-    
-    public TabularSubsetGenerator() {
-        
-    }
-    
-    public TabularSubsetGenerator (DataFile datafile, List<DataVariable> variables) throws IOException {
-        if (!datafile.isTabularData()) {
-            throw new IOException("DataFile is not tabular data.");
-        }
-         
-        setVarCount(datafile.getDataTable().getVarQuantity().intValue()); 
-        setCaseCount(datafile.getDataTable().getCaseQuantity().intValue()); 
-        
-           
-        
-        StorageIO<DataFile> dataAccess = datafile.getStorageIO();
-        if (!dataAccess.isLocalFile()) {
-            throw new IOException("Subsetting is supported on local files only!");
-        }
-        
-        //File tabfile = datafile.getFileSystemLocation().toFile();
-        File tabfile = dataAccess.getFileSystemPath().toFile();
+    //private static int MAX_COLUMN_BUFFER = 8192;
         
-        File rotatedImageFile = getRotatedImage(tabfile, getVarCount(), getCaseCount());
-        long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, getVarCount(), getCaseCount()); 
-        
-        fileChannel = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ));
-        
-        if (variables == null || variables.size() < 1 || variables.size() > getVarCount()) {
-            throw new IOException("Illegal number of variables in the subset request");
-        }
-        
-        subsetcount = variables.size();
-        columnTotalOffsets = new long[subsetcount];
-        columnTotalLengths = new long[subsetcount];
-        columnByteBuffers = new ByteBuffer[subsetcount];
-
-
+    public TabularSubsetGenerator() {
         
-        if (subsetcount == 1) {
-            if (!datafile.getDataTable().getId().equals(variables.get(0).getDataTable().getId())) {
-                throw new IOException("Variable in the subset request does not belong to the datafile.");
-            }
-            dbgLog.fine("single variable subset; setting fileChannel position to "+extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder()));
-            fileChannel.position(extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder()));
-            columnTotalLengths[0] = extractColumnLength(columnEndOffsets, variables.get(0).getFileOrder());
-            columnTotalOffsets[0] = 0;
-        } else {
-            columnEntries = new byte[subsetcount][];
-
-            columnBufferSizes = new int[subsetcount];
-            columnBufferOffsets = new int[subsetcount];
-            columnStartOffsets = new long[subsetcount];
-
-            int i = 0;
-            for (DataVariable var : variables) {
-                if (!datafile.getDataTable().getId().equals(var.getDataTable().getId())) {
-                    throw new IOException("Variable in the subset request does not belong to the datafile.");
-                }
-                columnByteBuffers[i] = ByteBuffer.allocate(MAX_COLUMN_BUFFER);
-                columnTotalLengths[i] = extractColumnLength(columnEndOffsets, var.getFileOrder());
-                columnStartOffsets[i] = extractColumnOffset(columnEndOffsets, var.getFileOrder());
-                if (columnTotalLengths[i] < MAX_COLUMN_BUFFER) {
-                    columnByteBuffers[i].limit((int)columnTotalLengths[i]);
-                }
-                fileChannel.position(columnStartOffsets[i]);
-                columnBufferSizes[i] = fileChannel.read(columnByteBuffers[i]);
-                columnBufferOffsets[i] = 0;
-                columnTotalOffsets[i] = columnBufferSizes[i];
-                i++;
-            }
-        }
-    }
-    
-    private int getVarCount() {
-        return varcount;
     }
     
-    private void setVarCount(int varcount) {
-        this.varcount = varcount; 
-    }
-    
-    private int getCaseCount() {
-        return casecount;
-    }
-    
-    private void setCaseCount(int casecount) {
-        this.casecount = casecount; 
-    }
-    
-    
-    /* 
-     * Note that this method operates on the *absolute* column number, i.e.
-     * the number of the physical column in the tabular file. This is stored
-     * in DataVariable.FileOrder. 
-     * This "column number" should not be confused with the number of column 
-     * in the subset request; a user can request any number of variable 
-     * columns, in an order that doesn't have to follow the physical order
-     * of the columns in the file. 
-    */
-    private long extractColumnOffset(long[] columnEndOffsets, int column) throws IOException {
-        if (columnEndOffsets == null || columnEndOffsets.length <= column) {
-            throw new IOException("Offsets table not initialized; or column out of bounds.");
-        }
-        long columnOffset;
-        
-        if (column > 0) {
-            columnOffset = columnEndOffsets[column - 1];
-        } else {
-            columnOffset = getVarCount() * 8; 
-        }
-        return columnOffset; 
-    }
-    
-    /* 
-     * See the comment for the method above. 
+    /**
+     * This class used to be much more complex. There were methods for subsetting
+     * from fixed-width field files; including using the optimized, "90 deg. rotated"
+     * versions of such files (i.e. you create a *columns-wise* copy of your data 
+     * file in which the columns are stored sequentially, and a table of byte 
+     * offsets of each column. You can then read individual variable columns 
+     * for cheap; at the expense of doubling the storage size of your tabular 
+     * data files. These methods were not used, so they were deleted (in Jan. 2024
+     * prior to 6.2.
+     * Please consult git history if you are interested in looking at that code. 
      */
-    private long extractColumnLength(long[] columnEndOffsets, int column) throws IOException {
-        if (columnEndOffsets == null || columnEndOffsets.length <= column) {
-            throw new IOException("Offsets table not initialized; or column out of bounds.");
-        }
-        long columnLength; 
-        
-        if (column > 0) {
-            columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1];
-        } else {
-            columnLength = columnEndOffsets[0] - varcount * 8;  
-        }
-        
-        return columnLength; 
-    }
-      
-    
-    private void bufferMoreColumnBytes(int column) throws IOException {
-        if (columnTotalOffsets[column] >= columnTotalLengths[column]) {
-            throw new IOException("attempt to buffer bytes past the column boundary");
-        }
-        fileChannel.position(columnStartOffsets[column] + columnTotalOffsets[column]);
-        
-        columnByteBuffers[column].clear();
-        if (columnTotalLengths[column] < columnTotalOffsets[column] + MAX_COLUMN_BUFFER) {
-            dbgLog.fine("Limiting the buffer to "+(columnTotalLengths[column] - columnTotalOffsets[column])+" bytes");
-            columnByteBuffers[column].limit((int) (columnTotalLengths[column] - columnTotalOffsets[column]));
-        }
-        columnBufferSizes[column] = fileChannel.read(columnByteBuffers[column]);
-        dbgLog.fine("Read "+columnBufferSizes[column]+" bytes for subset column "+column);
-        columnBufferOffsets[column] = 0;
-        columnTotalOffsets[column] += columnBufferSizes[column];
-    }
-    
-    public byte[] readColumnEntryBytes(int column) {
-        return readColumnEntryBytes(column, true);
-    }
-    
-    
-    public byte[] readColumnEntryBytes(int column, boolean addTabs) {
-        byte[] leftover = null; 
-        byte[] ret = null; 
-        
-        if (columnBufferOffsets[column] >= columnBufferSizes[column]) {
-            try {
-                bufferMoreColumnBytes(column);
-                if (columnBufferSizes[column] < 1) {
-                    return null;
-                }
-            } catch (IOException ioe) {
-                return null; 
-            }
-        }
-        
-        int byteindex = columnBufferOffsets[column];
-        try {
-            while (columnByteBuffers[column].array()[byteindex] != '\n') {
-                byteindex++;
-                if (byteindex == columnBufferSizes[column]) {
-                    // save the leftover: 
-                    if (leftover == null) {
-                        leftover = new byte[columnBufferSizes[column] - columnBufferOffsets[column]];
-                        System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], leftover, 0, columnBufferSizes[column] - columnBufferOffsets[column]);
-                    } else {
-                        byte[] merged = new byte[leftover.length + columnBufferSizes[column]];
-                        
-                        System.arraycopy(leftover, 0, merged, 0, leftover.length);
-                        System.arraycopy(columnByteBuffers[column].array(), 0, merged, leftover.length, columnBufferSizes[column]);
-                        leftover = merged;
-                        merged = null; 
-                    }
-                    // read more bytes:
-                    bufferMoreColumnBytes(column);
-                    if (columnBufferSizes[column] < 1) {
-                        return null;
-                    }
-                    byteindex = 0;
-                }
-            }
-
-            // presumably, we have found our '\n':
-            if (leftover == null) {
-                ret = new byte[byteindex - columnBufferOffsets[column] + 1];
-                System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], ret, 0, byteindex - columnBufferOffsets[column] + 1);
-            } else {
-                ret = new byte[leftover.length + byteindex + 1];
-                System.arraycopy(leftover, 0, ret, 0, leftover.length);
-                System.arraycopy(columnByteBuffers[column].array(), 0, ret, leftover.length, byteindex + 1);
-            }
-
-        } catch (IOException ioe) {
-            return null;
-        }
-
-        columnBufferOffsets[column] = (byteindex + 1);
-
-        if (column < columnBufferOffsets.length - 1) {
-            ret[ret.length - 1] = '\t';
-        }
-        return ret;
-    }
-    
-    public int readSingleColumnSubset(byte[] buffer) throws IOException {
-        if (columnTotalOffsets[0] == columnTotalLengths[0]) {
-            return -1;
-        }
-        
-        if (columnByteBuffers[0] == null) {
-            dbgLog.fine("allocating single column subset buffer.");
-            columnByteBuffers[0] = ByteBuffer.allocate(buffer.length);
-        }
-                
-        int bytesread = fileChannel.read(columnByteBuffers[0]);
-        dbgLog.fine("single column subset: read "+bytesread+" bytes.");
-        if (columnTotalOffsets[0] + bytesread > columnTotalLengths[0]) {
-            bytesread = (int)(columnTotalLengths[0] - columnTotalOffsets[0]);
-        }
-        System.arraycopy(columnByteBuffers[0].array(), 0, buffer, 0, bytesread);
-
-        columnTotalOffsets[0] += bytesread;
-        columnByteBuffers[0].clear();
-        return bytesread > 0 ? bytesread : -1;
-    }
-    
-    
-    public byte[] readSubsetLineBytes() throws IOException {
-        byte[] ret = null; 
-        int total = 0; 
         
-        for (int i = 0; i < subsetcount; i++) {
-            columnEntries[i] = readColumnEntryBytes(i);
-            if (columnEntries[i] == null) {
-                throw new IOException("Failed to read subset line entry");
-            }
-            total += columnEntries[i].length;
-        }
-        
-        ret = new byte[total];
-        int offset = 0;
-        for (int i = 0; i < subsetcount; i++) {
-            System.arraycopy(columnEntries[i], 0, ret, offset, columnEntries[i].length);
-            offset += columnEntries[i].length;
-        }
-        dbgLog.fine("line: "+new String(ret));
-        return ret;
-    } 
-    
-    
-    public void close() {
-        if (fileChannel != null) {
-            try {
-                fileChannel.close();
-            } catch (IOException ioe) {
-                // don't care.
-            }
-        }
-    }
-    
     public void subsetFile(String infile, String outfile, List<Integer> columns, Long numCases) {
         subsetFile(infile, outfile, columns, numCases, "\t");
     }
@@ -411,11 +132,15 @@ public void subsetFile(InputStream in, String outfile, List<Integer> columns, Lo
      * files, OK to use on small files:
      */
     
-    public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) {
+    public static Double[] subsetDoubleVector(InputStream in, int column, int numCases, boolean skipHeader) {
         Double[] retVector = new Double[numCases];
         try (Scanner scanner = new Scanner(in)) {
             scanner.useDelimiter("\\n");
 
+            if (skipHeader) {
+                skipFirstLine(scanner);
+            }
+            
             for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
                 if (scanner.hasNext()) {
                     String[] line = (scanner.next()).split("\t", -1);
@@ -463,11 +188,15 @@ public static Double[] subsetDoubleVector(InputStream in, int column, int numCas
      * Same deal as with the method above - straightforward, but (potentially) slow. 
      * Not a resource hog though - will only try to store one vector in memory. 
      */
-    public static Float[] subsetFloatVector(InputStream in, int column, int numCases) {
+    public static Float[] subsetFloatVector(InputStream in, int column, int numCases, boolean skipHeader) {
         Float[] retVector = new Float[numCases];
         try (Scanner scanner = new Scanner(in)) {
             scanner.useDelimiter("\\n");
 
+            if (skipHeader) {
+                skipFirstLine(scanner);
+            }
+            
             for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
                 if (scanner.hasNext()) {
                     String[] line = (scanner.next()).split("\t", -1);
@@ -513,11 +242,15 @@ public static Float[] subsetFloatVector(InputStream in, int column, int numCases
      * Same deal as with the method above - straightforward, but (potentially) slow. 
      * Not a resource hog though - will only try to store one vector in memory. 
      */
-    public static Long[] subsetLongVector(InputStream in, int column, int numCases) {
+    public static Long[] subsetLongVector(InputStream in, int column, int numCases, boolean skipHeader) {
         Long[] retVector = new Long[numCases];
         try (Scanner scanner = new Scanner(in)) {
             scanner.useDelimiter("\\n");
 
+            if (skipHeader) {
+                skipFirstLine(scanner);
+            }
+            
             for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
                 if (scanner.hasNext()) {
                     String[] line = (scanner.next()).split("\t", -1);
@@ -549,11 +282,15 @@ public static Long[] subsetLongVector(InputStream in, int column, int numCases)
      * Same deal as with the method above - straightforward, but (potentially) slow. 
      * Not a resource hog though - will only try to store one vector in memory. 
      */
-    public static String[] subsetStringVector(InputStream in, int column, int numCases) {
+    public static String[] subsetStringVector(InputStream in, int column, int numCases, boolean skipHeader) {
         String[] retVector = new String[numCases];
         try (Scanner scanner = new Scanner(in)) {
             scanner.useDelimiter("\\n");
 
+            if (skipHeader) {
+                skipFirstLine(scanner);
+            }
+            
             for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
                 if (scanner.hasNext()) {
                     String[] line = (scanner.next()).split("\t", -1);
@@ -621,819 +358,10 @@ public static String[] subsetStringVector(InputStream in, int column, int numCas
 
     }
 
-    /*
-     * Straightforward method for subsetting a tab-delimited data file, extracting
-     * all the columns representing continuous variables and returning them as 
-     * a 2-dimensional array of Doubles;
-     * Inefficient on large files, OK to use on small ones.
-     */
-    public static Double[][] subsetDoubleVectors(InputStream in, Set<Integer> columns, int numCases) throws IOException {
-        Double[][] retVector = new Double[columns.size()][numCases];
-        try (Scanner scanner = new Scanner(in)) {
-            scanner.useDelimiter("\\n");
-
-            for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
-                if (scanner.hasNext()) {
-                    String[] line = (scanner.next()).split("\t", -1);
-                    int j = 0;
-                    for (Integer i : columns) {
-                        try {
-                            // TODO: verify that NaN and +-Inf are going to be
-                            // handled correctly here! -- L.A.
-                            // NO, "+-Inf" is not handled correctly; see the
-                            // comment further down below.
-                            retVector[j][caseIndex] = new Double(line[i]);
-                        } catch (NumberFormatException ex) {
-                            retVector[j][caseIndex] = null; // missing value
-                        }
-                        j++;
-                    }
-                } else {
-                    throw new IOException("Tab file has fewer rows than the stored number of cases!");
-                }
-            }
-
-            int tailIndex = numCases;
-            while (scanner.hasNext()) {
-                String nextLine = scanner.next();
-                if (!"".equals(nextLine)) {
-                    throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine);
-                }
-                tailIndex++;
-            }
-
-        }
-        return retVector;
-
-    }
-    
-    public String[] subsetStringVector(DataFile datafile, int column) throws IOException {
-        return (String[])subsetObjectVector(datafile, column, COLUMN_TYPE_STRING);
-    }
-    
-    public Double[] subsetDoubleVector(DataFile datafile, int column) throws IOException {
-        return (Double[])subsetObjectVector(datafile, column, COLUMN_TYPE_DOUBLE);
-    }
-    
-    public Long[] subsetLongVector(DataFile datafile, int column) throws IOException {
-        return (Long[])subsetObjectVector(datafile, column, COLUMN_TYPE_LONG);
-    }
-    
-    // Float methods are temporary; 
-    // In normal operations we'll be treating all the floating point types as 
-    // doubles. I need to be able to handle floats for some 4.0 vs 3.* ingest
-    // tests. -- L.A. 
-    
-    public Float[] subsetFloatVector(DataFile datafile, int column) throws IOException {
-        return (Float[])subsetObjectVector(datafile, column, COLUMN_TYPE_FLOAT);
-    }
-    
-    public String[] subsetStringVector(File tabfile, int column, int varcount, int casecount) throws IOException {
-        return (String[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_STRING);
-    }
-    
-    public Double[] subsetDoubleVector(File tabfile, int column, int varcount, int casecount) throws IOException {
-        return (Double[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_DOUBLE);
-    }
-    
-    public Long[] subsetLongVector(File tabfile, int column, int varcount, int casecount) throws IOException {
-        return (Long[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_LONG);
-    }
-    
-    public Float[] subsetFloatVector(File tabfile, int column, int varcount, int casecount) throws IOException {
-        return (Float[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_FLOAT);
-    }
-    
-    public Object[] subsetObjectVector(DataFile dataFile, int column, int columntype) throws IOException {
-        if (!dataFile.isTabularData()) {
-            throw new IOException("DataFile is not tabular data.");
-        }
-         
-        int varcount = dataFile.getDataTable().getVarQuantity().intValue(); 
-        int casecount = dataFile.getDataTable().getCaseQuantity().intValue(); 
-        
-        if (column >= varcount) {
-            throw new IOException("Column "+column+" is out of bounds.");
-        }
-        
-        StorageIO<DataFile> dataAccess = dataFile.getStorageIO();
-        if (!dataAccess.isLocalFile()) {
-            throw new IOException("Subsetting is supported on local files only!");
-        }
-        
-        //File tabfile = datafile.getFileSystemLocation().toFile();
-        File tabfile = dataAccess.getFileSystemPath().toFile();
-
-        if (columntype == COLUMN_TYPE_STRING) {
-            String filename = dataFile.getFileMetadata().getLabel();
-            if (filename != null) {
-                filename = filename.replaceFirst("^_", "");
-                Integer fnumvalue = null; 
-                try {
-                    fnumvalue = new Integer(filename);
-                } catch (Exception ex){
-                    fnumvalue = null; 
-                }
-                if (fnumvalue != null) {
-                    //if ((fnumvalue.intValue() < 112497)) { // && (fnumvalue.intValue() > 60015)) {
-                    if ((fnumvalue.intValue() < 111931)) { // && (fnumvalue.intValue() > 60015)) {
-                        if (!(fnumvalue.intValue() == 60007
-                                || fnumvalue.intValue() == 59997
-                                || fnumvalue.intValue() == 60015
-                                || fnumvalue.intValue() == 59948
-                                || fnumvalue.intValue() == 60012
-                                || fnumvalue.intValue() == 52585
-                                || fnumvalue.intValue() == 60005
-                                || fnumvalue.intValue() == 60002
-                                || fnumvalue.intValue() == 59954
-                                || fnumvalue.intValue() == 60008
-                                || fnumvalue.intValue() == 54972
-                                || fnumvalue.intValue() == 55010
-                                || fnumvalue.intValue() == 54996
-                                || fnumvalue.intValue() == 53527
-                                || fnumvalue.intValue() == 53546
-                                || fnumvalue.intValue() == 55002
-                                || fnumvalue.intValue() == 55006
-                                || fnumvalue.intValue() == 54998
-                                || fnumvalue.intValue() == 52552
-                                // SPSS/SAV cases with similar issue - compat mode must be disabled
-                                //|| fnumvalue.intValue() == 101826 // temporary - tricky file with accents and v. 16...
-                                || fnumvalue.intValue() == 54618 // another SAV file, with long strings...
-                                || fnumvalue.intValue() == 54619 // [same]
-                                || fnumvalue.intValue() == 57983 
-                                || fnumvalue.intValue() == 58262
-                                || fnumvalue.intValue() == 58288
-                                || fnumvalue.intValue() == 58656
-                                || fnumvalue.intValue() == 59144
-                                // || fnumvalue.intValue() == 69626 [nope!]
-                                )) {
-                            dbgLog.info("\"Old\" file name detected; using \"compatibility mode\" for a character vector subset;");
-                            return subsetObjectVector(tabfile, column, varcount, casecount, columntype, true);
-                        }
-                    }
-                }
-            }
+    private static void skipFirstLine(Scanner scanner) {
+        if (!scanner.hasNext()) {
+            throw new RuntimeException("Failed to read the variable name header line from the tab-delimited file!");
         }
-
-        return subsetObjectVector(tabfile, column, varcount, casecount, columntype);
-    }
-    
-    public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype) throws IOException {
-        return subsetObjectVector(tabfile, column, varcount, casecount, columntype, false);
-    }
-    
-    
-    
-    public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype, boolean compatmode) throws IOException {
-        
-        Object[] retVector = null; 
-        
-        boolean isString = false; 
-        boolean isDouble = false;
-        boolean isLong   = false; 
-        boolean isFloat  = false; 
-        
-        //Locale loc = new Locale("en", "US");
-        
-        if (columntype == COLUMN_TYPE_STRING) {
-            isString = true; 
-            retVector = new String[casecount];
-        } else if (columntype == COLUMN_TYPE_DOUBLE) {
-            isDouble = true; 
-            retVector = new Double[casecount];
-        } else if (columntype == COLUMN_TYPE_LONG) {
-            isLong = true; 
-            retVector = new Long[casecount];
-        } else if (columntype == COLUMN_TYPE_FLOAT){
-            isFloat = true;
-            retVector = new Float[casecount];
-        } else {
-            throw new IOException("Unsupported column type: "+columntype);
-        }
-        
-        File rotatedImageFile = getRotatedImage(tabfile, varcount, casecount);
-        long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, varcount, casecount); 
-        long columnOffset = 0; 
-        long columnLength = 0; 
-        
-        if (column > 0) {
-            columnOffset = columnEndOffsets[column - 1];
-            columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1];
-        } else {
-            columnOffset = varcount * 8; 
-            columnLength = columnEndOffsets[0] - varcount * 8;  
-        }
-        int caseindex = 0;
-        
-        try (FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()),
-                StandardOpenOption.READ))) {
-            fc.position(columnOffset);
-            int MAX_COLUMN_BUFFER = 8192;
-
-            ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER);
-
-            if (columnLength < MAX_COLUMN_BUFFER) {
-                in.limit((int) (columnLength));
-            }
-
-            long bytesRead = 0;
-            long bytesReadTotal = 0;
-
-            int byteoffset = 0;
-            byte[] leftover = null;
-
-            while (bytesReadTotal < columnLength) {
-                bytesRead = fc.read(in);
-                byte[] columnBytes = in.array();
-                int bytecount = 0;
-
-                while (bytecount < bytesRead) {
-                    if (columnBytes[bytecount] == '\n') {
-                        /*
-                        String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8");
-
-                        if (leftover != null) {
-                            String leftoverString = new String (leftover, "UTF8");
-                            token = leftoverString + token;
-                            leftover = null;
-                        }
-                        */
-                        /*
-                         * Note that the way I was doing it at first - above - 
-                         * was not quite the correct way - because I was creating UTF8
-                         * strings from the leftover bytes, and the bytes in the 
-                         * current buffer *separately*; which means, if a multi-byte
-                         * UTF8 character got split in the middle between one buffer
-                         * and the next, both chunks of it would become junk 
-                         * characters, on each side!
-                         * The correct way of doing it, of course, is to create a
-                         * merged byte buffer, and then turn it into a UTF8 string. 
-                         *      -- L.A. 4.0
-                         */
-                        String token = null;
-
-                        if (leftover == null) {
-                            token = new String(columnBytes, byteoffset, bytecount - byteoffset, "UTF8");
-                        } else {
-                            byte[] merged = new byte[leftover.length + bytecount - byteoffset];
-
-                            System.arraycopy(leftover, 0, merged, 0, leftover.length);
-                            System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount - byteoffset);
-                            token = new String(merged, "UTF8");
-                            leftover = null;
-                            merged = null;
-                        }
-
-                        if (isString) {
-                            if ("".equals(token)) {
-                                // An empty string is a string missing value!
-                                // An empty string in quotes is an empty string!
-                                retVector[caseindex] = null;
-                            } else {
-                                // Strip the outer quotes:
-                                token = token.replaceFirst("^\\\"", "");
-                                token = token.replaceFirst("\\\"$", "");
-
-                                // We need to restore the special characters that
-                                // are stored in tab files escaped - quotes, new lines
-                                // and tabs. Before we do that however, we need to
-                                // take care of any escaped backslashes stored in
-                                // the tab file. I.e., "foo\t" should be transformed
-                                // to "foo<TAB>"; but "foo\\t" should be transformed
-                                // to "foo\t". This way new lines and tabs that were
-                                // already escaped in the original data are not
-                                // going to be transformed to unescaped tab and
-                                // new line characters!
-
-                                String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2);
-
-                                // (note that it's important to use the 2-argument version
-                                // of String.split(), and set the limit argument to a
-                                // negative value; otherwise any trailing backslashes
-                                // are lost.)
-
-                                for (int i = 0; i < splitTokens.length; i++) {
-                                    splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\"");
-                                    splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t");
-                                    splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n");
-                                    splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r");
-                                }
-                                // TODO:
-                                // Make (some of?) the above optional; for ex., we
-                                // do need to restore the newlines when calculating UNFs;
-                                // But if we are subsetting these vectors in order to
-                                // create a new tab-delimited file, they will
-                                // actually break things! -- L.A. Jul. 28 2014
-
-                                token = StringUtils.join(splitTokens, '\\');
-
-                                // "compatibility mode" - a hack, to be able to produce
-                                // unfs identical to those produced by the "early"
-                                // unf5 jar; will be removed in production 4.0.
-                                // -- L.A. (TODO: ...)
-                                if (compatmode && !"".equals(token)) {
-                                    if (token.length() > 128) {
-                                        if ("".equals(token.trim())) {
-                                            // don't ask...
-                                            token = token.substring(0, 129);
-                                        } else {
-                                            token = token.substring(0, 128);
-                                            // token = String.format(loc, "%.128s", token);
-                                            token = token.trim();
-                                            // dbgLog.info("formatted and trimmed: "+token);
-                                        }
-                                    } else {
-                                        if ("".equals(token.trim())) {
-                                            // again, don't ask;
-                                            // - this replicates some bugginness
-                                            // that happens inside unf5;
-                                            token = "null";
-                                        } else {
-                                            token = token.trim();
-                                        }
-                                    }
-                                }
-
-                                retVector[caseindex] = token;
-                            }
-                        } else if (isDouble) {
-                            try {
-                                // TODO: verify that NaN and +-Inf are
-                                // handled correctly here! -- L.A.
-                                // Verified: new Double("nan") works correctly,
-                                // resulting in Double.NaN;
-                                // Double("[+-]Inf") doesn't work however;
-                                // (the constructor appears to be expecting it
-                                // to be spelled as "Infinity", "-Infinity", etc.
-                                if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) {
-                                    retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY;
-                                } else if ("-inf".equalsIgnoreCase(token)) {
-                                    retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY;
-                                } else if (token == null || token.equals("")) {
-                                    // missing value:
-                                    retVector[caseindex] = null;
-                                } else {
-                                    retVector[caseindex] = new Double(token);
-                                }
-                            } catch (NumberFormatException ex) {
-                                dbgLog.warning("NumberFormatException thrown for " + token + " as Double");
-
-                                retVector[caseindex] = null; // missing value
-                                // TODO: ?
-                            }
-                        } else if (isLong) {
-                            try {
-                                retVector[caseindex] = new Long(token);
-                            } catch (NumberFormatException ex) {
-                                retVector[caseindex] = null; // assume missing value
-                            }
-                        } else if (isFloat) {
-                            try {
-                                if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) {
-                                    retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY;
-                                } else if ("-inf".equalsIgnoreCase(token)) {
-                                    retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY;
-                                } else if (token == null || token.equals("")) {
-                                    // missing value:
-                                    retVector[caseindex] = null;
-                                } else {
-                                    retVector[caseindex] = new Float(token);
-                                }
-                            } catch (NumberFormatException ex) {
-                                dbgLog.warning("NumberFormatException thrown for " + token + " as Float");
-                                retVector[caseindex] = null; // assume missing value (TODO: ?)
-                            }
-                        }
-                        caseindex++;
-
-                        if (bytecount == bytesRead - 1) {
-                            byteoffset = 0;
-                        } else {
-                            byteoffset = bytecount + 1;
-                        }
-                    } else {
-                        if (bytecount == bytesRead - 1) {
-                            // We've reached the end of the buffer;
-                            // This means we'll save whatever unused bytes left in
-                            // it - i.e., the bytes between the last new line
-                            // encountered and the end - in the leftover buffer.
-
-                            // *EXCEPT*, there may be a case of a very long String
-                            // that is actually longer than MAX_COLUMN_BUFFER, in
-                            // which case it is possible that we've read through
-                            // an entire buffer of bytes without finding any
-                            // new lines... in this case we may need to add this
-                            // entire byte buffer to an already existing leftover
-                            // buffer!
-                            if (leftover == null) {
-                                leftover = new byte[(int) bytesRead - byteoffset];
-                                System.arraycopy(columnBytes, byteoffset, leftover, 0, (int) bytesRead - byteoffset);
-                            } else {
-                                if (byteoffset != 0) {
-                                throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!");
-                                }
-                                byte[] merged = new byte[leftover.length + (int) bytesRead];
-
-                                System.arraycopy(leftover, 0, merged, 0, leftover.length);
-                                System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int) bytesRead);
-                                // leftover = null;
-                                leftover = merged;
-                                merged = null;
-                            }
-                            byteoffset = 0;
-
-                        }
-                    }
-                    bytecount++;
-                }
-
-                bytesReadTotal += bytesRead;
-                in.clear();
-                if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) {
-                    in.limit((int) (columnLength - bytesReadTotal));
-                }
-            }
-
-        }
-
-        if (caseindex != casecount) {
-            throw new IOException("Faile to read "+casecount+" tokens for column "+column);
-            //System.out.println("read "+caseindex+" tokens instead of expected "+casecount+".");
-        }
-        
-        return retVector; 
-    }
-    
-    private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException {
-        long[] byteOffsets = new long[varcount];
-        
-        try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile))) {
-
-            byte[] offsetHeader = new byte[varcount * 8];
-
-            int readlen = rotfileStream.read(offsetHeader);
-
-            if (readlen != varcount * 8) {
-                throw new IOException("Could not read " + varcount * 8 + " header bytes from the rotated file.");
-            }
-
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                byte[] offsetBytes = new byte[8];
-                System.arraycopy(offsetHeader, varindex * 8, offsetBytes, 0, 8);
-
-                ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes);
-                byteOffsets[varindex] = offsetByteBuffer.getLong();
-
-                // System.out.println(byteOffsets[varindex]);
-            }
-
-        }
-
-        return byteOffsets;
-    }
-    
-    private File getRotatedImage(File tabfile, int varcount, int casecount)  throws IOException {
-        String fileName = tabfile.getAbsolutePath();
-        String rotatedImageFileName = fileName + ".90d";
-        File rotatedImageFile = new File(rotatedImageFileName); 
-        if (rotatedImageFile.exists()) {
-            //System.out.println("Image already exists!");
-            return rotatedImageFile;
-        }
-        
-        return generateRotatedImage(tabfile, varcount, casecount);
-        
-    }
-    
-    private File generateRotatedImage (File tabfile, int varcount, int casecount) throws IOException {
-        // TODO: throw exceptions if bad file, zero varcount, etc. ...
-        
-        String fileName = tabfile.getAbsolutePath();
-        String rotatedImageFileName = fileName + ".90d";
-        
-        int MAX_OUTPUT_STREAMS = 32;
-        int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now?
-        int MAX_COLUMN_BUFFER = 8 * 1024; 
-        
-        // offsetHeader will contain the byte offsets of the individual column 
-        // vectors in the final rotated image file
-        byte[] offsetHeader = new byte[varcount * 8];
-        int[] bufferedSizes = new int[varcount];
-        long[] cachedfileSizes = new long[varcount];
-        File[] columnTempFiles = new File[varcount];
-        
-        for (int i = 0; i < varcount; i++) {
-            bufferedSizes[i] = 0; 
-            cachedfileSizes[i] = 0;
-        }
-        
-        // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is 
-        // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?)
-        
-        byte[][] bufferedColumns = new byte [varcount][MAX_COLUMN_BUFFER];
-        
-        // read the tab-delimited file: 
-        
-        try (FileInputStream tabfileStream = new FileInputStream(tabfile);
-                Scanner scanner = new Scanner(tabfileStream)) {
-            scanner.useDelimiter("\\n");
-
-            for (int caseindex = 0; caseindex < casecount; caseindex++) {
-                if (scanner.hasNext()) {
-                    String[] line = (scanner.next()).split("\t", -1);
-                    // TODO: throw an exception if there are fewer tab-delimited
-                    // tokens than the number of variables specified.
-                    String token = "";
-                    int tokensize = 0;
-                    for (int varindex = 0; varindex < varcount; varindex++) {
-                        // TODO: figure out the safest way to convert strings to
-                        // bytes here. Is it going to be safer to use getBytes("UTF8")?
-                        // we are already making the assumption that the values
-                        // in the tab file are in UTF8. -- L.A.
-                        token = line[varindex] + "\n";
-                        tokensize = token.getBytes().length;
-                        if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) {
-                            // fill the buffer and dump its contents into the temp file:
-                            // (do note that there may be *several* MAX_COLUMN_BUFFERs
-                            // worth of bytes in the token!)
-
-                            int tokenoffset = 0;
-
-                            if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) {
-                                tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex];
-                                System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset);
-                            } // (otherwise the buffer is already full, and we should
-                              // simply dump it into the temp file, without adding any
-                              // extra bytes to it)
-
-                            File bufferTempFile = columnTempFiles[varindex];
-                            if (bufferTempFile == null) {
-                                bufferTempFile = File.createTempFile("columnBufferFile", "bytes");
-                                columnTempFiles[varindex] = bufferTempFile;
-                            }
-
-                            // *append* the contents of the buffer to the end of the
-                            // temp file, if already exists:
-                            try (BufferedOutputStream outputStream = new BufferedOutputStream(
-                                    new FileOutputStream(bufferTempFile, true))) {
-                                outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER);
-                                cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
-
-                                // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into
-                                // the temp file, for as long as there's more than MAX_COLUMN_BUFFER
-                                // bytes left in the token:
-
-                                while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) {
-                                    outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER);
-                                    cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
-                                    tokenoffset += MAX_COLUMN_BUFFER;
-                                }
-
-                            }
-
-                            // buffer the remaining bytes and reset the buffered
-                            // byte counter:
-
-                            System.arraycopy(token.getBytes(), 
-                                    tokenoffset, 
-                                    bufferedColumns[varindex], 
-                                    0,
-                                    tokensize - tokenoffset);
-
-                            bufferedSizes[varindex] = tokensize - tokenoffset;
-
-                        } else {
-                            // continue buffering
-                            System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize);
-                            bufferedSizes[varindex] += tokensize;
-                        }
-                    }
-                } else {
-                    throw new IOException("Tab file has fewer rows than the stored number of cases!");
-                }
-            }
-        }
-        
-        // OK, we've created the individual byte vectors of the tab file columns;
-        // they may be partially saved in temp files and/or in memory.
-        // We now need to go through all these buffers and create the final 
-        // rotated image file. 
-        
-        try (BufferedOutputStream finalOut = new BufferedOutputStream(
-                new FileOutputStream(new File(rotatedImageFileName)))) {
-
-            // but first we should create the offset header and write it out into
-            // the final file; because it should be at the head, doh!
-
-            long columnOffset = varcount * 8;
-            // (this is the offset of the first column vector; it is equal to the
-            // size of the offset header, i.e. varcount * 8 bytes)
-
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex];
-                columnOffset += totalColumnBytes;
-                // totalColumnBytes;
-                byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array();
-                System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8);
-            }
-
-            finalOut.write(offsetHeader, 0, varcount * 8);
-
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                long cachedBytesRead = 0;
-
-                // check if there is a cached temp file:
-
-                File cachedTempFile = columnTempFiles[varindex];
-                if (cachedTempFile != null) {
-                    byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER];
-                    try (BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile))) {
-                        int readlen = 0;
-                        while ((readlen = cachedIn.read(cachedBytes)) > -1) {
-                            finalOut.write(cachedBytes, 0, readlen);
-                            cachedBytesRead += readlen;
-                        }
-                    }
-
-                    // delete the temp file:
-                    cachedTempFile.delete();
-
-                }
-
-                if (cachedBytesRead != cachedfileSizes[varindex]) {
-                    throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+
-                        cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read.");
-                }
-
-                // then check if there are any bytes buffered for this column:
-
-                if (bufferedSizes[varindex] > 0) {
-                    finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]);
-                }
-
-            }
-        }
-        
-        return new File(rotatedImageFileName);
-
-    }
-  
-    /*
-     * Test method for taking a "rotated" image, and reversing it, reassembling 
-     * all the columns in the original order. Which should result in a file 
-     * byte-for-byte identical file to the original tab-delimited version.
-     *
-     * (do note that this method is not efficiently implemented; it's only 
-     * being used for experiments so far, to confirm the accuracy of the 
-     * accuracy of generateRotatedImage(). It should not be used for any 
-     * practical means in the application!)
-     */
-    private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException {
-        // open the file, read in the offset header: 
-        try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile))) {
-            byte[] offsetHeader = new byte[varcount * 8];
-            long[] byteOffsets = new long[varcount];
-            
-            int readlen = rotfileStream.read(offsetHeader); 
-            
-            if (readlen != varcount * 8) {
-                throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file.");
-            }
-            
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                byte[] offsetBytes = new byte[8];
-                System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8);
-               
-                ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes);
-                byteOffsets[varindex] = offsetByteBuffer.getLong();
-                
-                //System.out.println(byteOffsets[varindex]);
-            }
-            
-            String [][] reversedMatrix = new String[casecount][varcount];
-            
-            long offset = varcount * 8; 
-            byte[] columnBytes; 
-            
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                long columnLength = byteOffsets[varindex] - offset; 
-                
-                
-                
-                columnBytes = new byte[(int)columnLength];
-                readlen = rotfileStream.read(columnBytes);
-                
-                if (readlen != columnLength) {
-                    throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex);
-                }
-                /*
-                String columnString = new String(columnBytes);
-                //System.out.print(columnString);
-                String[] values = columnString.split("\n", -1);
-                
-                if (values.length < casecount) {
-                    throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex);
-                }
-                
-                for (int caseindex = 0; caseindex < casecount; caseindex++) {
-                    reversedMatrix[caseindex][varindex] = values[caseindex];
-                }*/
-                
-                int bytecount = 0; 
-                int byteoffset = 0; 
-                int caseindex = 0;
-                //System.out.println("generating value vector for column "+varindex);
-                while (bytecount < columnLength) {
-                    if (columnBytes[bytecount] == '\n') {
-                        String token = new String(columnBytes, byteoffset, bytecount-byteoffset);
-                        reversedMatrix[caseindex++][varindex] = token;
-                        byteoffset = bytecount + 1;
-                    }
-                    bytecount++;
-                }
-                
-                if (caseindex != casecount) {
-                    throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex);
-                }
-                offset = byteOffsets[varindex];
-            }
-            
-            for (int caseindex = 0; caseindex < casecount; caseindex++) {
-                for (int varindex = 0; varindex < varcount; varindex++) {
-                    System.out.print(reversedMatrix[caseindex][varindex]);
-                    if (varindex < varcount-1) {
-                        System.out.print("\t");
-                    } else {
-                        System.out.print("\n");
-                    }
-                }
-            }
-            
-        }
-        
-        
-    }
-    
-    /**
-     * main() method, for testing
-     * usage: java edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator testfile.tab varcount casecount column type
-     * make sure the CLASSPATH contains ...
-     * 
-     */
-    
-    public static void main(String[] args) {
-        
-        String tabFileName = args[0]; 
-        int varcount = new Integer(args[1]).intValue();
-        int casecount = new Integer(args[2]).intValue();
-        int column = new Integer(args[3]).intValue();
-        String type = args[4];
-        
-        File tabFile = new File(tabFileName);
-        File rotatedImageFile = null; 
-        
-        TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); 
-        
-        /*
-        try {
-            rotatedImageFile = subsetGenerator.getRotatedImage(tabFile, varcount, casecount);
-        } catch (IOException ex) {
-            System.out.println(ex.getMessage());
-        }
-        */
-        
-        //System.out.println("\nFinished generating \"rotated\" column image file."); 
-                
-        //System.out.println("\nOffsets:");
-        
-        MathContext doubleMathContext = new MathContext(15, RoundingMode.HALF_EVEN);
-        String FORMAT_IEEE754 = "%+#.15e";
-        
-        try {
-            //subsetGenerator.reverseRotatedImage(rotatedImageFile, varcount, casecount);
-            //String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount);
-            if ("string".equals(type)) {
-                String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount);
-                for (int i = 0; i < casecount; i++) {
-                    System.out.println(columns[i]);
-                }
-            } else {
-
-                Double[] columns = subsetGenerator.subsetDoubleVector(tabFile, column, varcount, casecount);
-                for (int i = 0; i < casecount; i++) {
-                    if (columns[i] != null) {
-                        BigDecimal outBigDecimal = new BigDecimal(columns[i], doubleMathContext);
-                        System.out.println(String.format(FORMAT_IEEE754, outBigDecimal));
-                    } else {
-                        System.out.println("NA");
-                    }
-                    //System.out.println(columns[i]);
-                }
-            }
-        } catch (IOException ex) {
-            System.out.println(ex.getMessage());
-        }
-    }
-}
-
-
+        scanner.next();
+    }   
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java
deleted file mode 100644
index 89e033353c1..00000000000
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package edu.harvard.iq.dataverse.dataaccess;
-
-import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.datavariable.DataVariable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- *
- * @author Leonid Andreev
- */
-public class TabularSubsetInputStream extends InputStream {
-    private static final Logger logger = Logger.getLogger(TabularSubsetInputStream.class.getCanonicalName());
-    
-    private TabularSubsetGenerator subsetGenerator = null;
-    private int numberOfSubsetVariables;
-    private int numberOfObservations; 
-    private int numberOfObservationsRead = 0;
-    private byte[] leftoverBytes = null; 
-    
-    public TabularSubsetInputStream(DataFile datafile, List<DataVariable> variables) throws IOException {
-        if (datafile == null) {
-            throw new IOException("Null datafile in subset request");
-        }
-        if (!datafile.isTabularData()) {
-            throw new IOException("Subset requested on a non-tabular data file");
-        }
-        numberOfObservations = datafile.getDataTable().getCaseQuantity().intValue();
-        
-        if (variables == null || variables.size() < 1) {
-            throw new IOException("Null or empty list of variables in subset request.");
-        }
-        numberOfSubsetVariables = variables.size();
-        subsetGenerator = new TabularSubsetGenerator(datafile, variables);
-
-    }
-    
-    //@Override
-    public int read() throws IOException {
-        throw new IOException("read() method not implemented; do not use.");
-    }
-
-    //@Override
-    public int read(byte[] b) throws IOException {
-        // TODO: 
-        // Move this code into TabularSubsetGenerator
-        logger.fine("subset input stream: read request, on a "+b.length+" byte buffer;");
-        
-        if (numberOfSubsetVariables == 1) {
-            logger.fine("calling the single variable subset read method");
-            return subsetGenerator.readSingleColumnSubset(b);
-        }
-        
-        int bytesread = 0; 
-        byte [] linebuffer; 
-        
-        // do we have a leftover?
-        if (leftoverBytes != null) {
-            if (leftoverBytes.length < b.length) {
-                System.arraycopy(leftoverBytes, 0, b, 0, leftoverBytes.length);
-                bytesread = leftoverBytes.length; 
-                leftoverBytes = null; 
-
-            } else {
-                // shouldn't really happen... unless it's a very large subset, 
-                // or a very long string, etc.
-                System.arraycopy(leftoverBytes, 0, b, 0, b.length);
-                byte[] tmp = new byte[leftoverBytes.length - b.length];
-                System.arraycopy(leftoverBytes, b.length, tmp, 0, leftoverBytes.length - b.length);
-                leftoverBytes = tmp; 
-                tmp = null; 
-                return b.length; 
-            }
-        }
-        
-        while (bytesread < b.length && numberOfObservationsRead < numberOfObservations) {
-            linebuffer = subsetGenerator.readSubsetLineBytes();
-            numberOfObservationsRead++;
-
-            if (bytesread + linebuffer.length < b.length) {
-                // copy linebuffer into the return buffer:
-                System.arraycopy(linebuffer, 0, b, bytesread, linebuffer.length);
-                bytesread += linebuffer.length;
-            } else {
-                System.arraycopy(linebuffer, 0, b, bytesread, b.length - bytesread);
-                // save the leftover;
-                if (bytesread + linebuffer.length > b.length) {
-                    leftoverBytes = new byte[bytesread + linebuffer.length - b.length];
-                    System.arraycopy(linebuffer, b.length - bytesread, leftoverBytes, 0, bytesread + linebuffer.length - b.length);
-                }
-                return b.length; 
-            }
-        }
-        
-        // and this means we've reached the end of the tab file!
-        
-        return bytesread > 0 ? bytesread : -1;
-    }
-    
-    //@Override
-    public void close() {
-        if (subsetGenerator != null) {
-            subsetGenerator.close();
-        }
-    }
-}
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
index 5119b4b96c7..edd01ae98a3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
@@ -545,6 +545,16 @@ private void createDataFileDDI(XMLStreamWriter xmlw, Set<String> excludedFieldSe
             List<DataVariable> vars = variableService.findByDataTableId(dt.getId());
             if (checkField("catgry", excludedFieldSet, includedFieldSet)) {
                 if (checkIsWithoutFrequencies(vars)) {
+                    // @todo: the method called here to calculate frequencies 
+                    // when they are missing from the database (for whatever
+                    // reasons) subsets the physical tab-delimited file and 
+                    // calculates them in real time. this is very expensive operation
+                    // potentially. let's make sure that, when we do this, we 
+                    // save the resulting frequencies in the database, so that 
+                    // we don't have to do this again. Also, let's double check 
+                    // whether the "checkIsWithoutFrequencies()" method is doing
+                    // the right thing - as it appears to return true when there 
+                    // are no categorical variables in the DataTable (?)
                     calculateFrequencies(df, vars);
                 }
             }
@@ -580,6 +590,7 @@ private boolean checkIsWithoutFrequencies(List<DataVariable> vars) {
 
     private void calculateFrequencies(DataFile df, List<DataVariable> vars)
     {
+        // @todo: see the comment in the part of the code that calls this method
         try {
             DataConverter dc = new DataConverter();
             File tabFile = dc.downloadFromStorageIO(df.getStorageIO());
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 233f746fb17..9bacafd173f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -726,27 +726,17 @@ public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFil
     }
     
     public void produceContinuousSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException {
-
-        /* 
-        // quick, but memory-inefficient way:
-        // - this method just loads the entire file-worth of continuous vectors 
-        // into a Double[][] matrix. 
-        //Double[][] variableVectors = subsetContinuousVectors(dataFile);
-        //calculateContinuousSummaryStatistics(dataFile, variableVectors);
-        
-        // A more sophisticated way: this subsets one column at a time, using 
-        // the new optimized subsetting that does not have to read any extra 
-        // bytes from the file to extract the column:
-        
-        TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator();
-        */
         
         for (int i = 0; i < dataFile.getDataTable().getVarQuantity(); i++) {
             if (dataFile.getDataTable().getDataVariables().get(i).isIntervalContinuous()) {
                 logger.fine("subsetting continuous vector");
 
                 if ("float".equals(dataFile.getDataTable().getDataVariables().get(i).getFormat())) {
-                    Float[] variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
+                    Float[] variableVector = TabularSubsetGenerator.subsetFloatVector(
+                            new FileInputStream(generatedTabularFile), 
+                            i, 
+                            dataFile.getDataTable().getCaseQuantity().intValue(),
+                            dataFile.getDataTable().isStoredWithVariableHeader());
                     logger.fine("Calculating summary statistics on a Float vector;");
                     calculateContinuousSummaryStatistics(dataFile, i, variableVector);
                     // calculate the UNF while we are at it:
@@ -754,7 +744,11 @@ public void produceContinuousSummaryStatistics(DataFile dataFile, File generated
                     calculateUNF(dataFile, i, variableVector);
                     variableVector = null; 
                 } else {
-                    Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
+                    Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector(
+                            new FileInputStream(generatedTabularFile), 
+                            i, 
+                            dataFile.getDataTable().getCaseQuantity().intValue(), 
+                            dataFile.getDataTable().isStoredWithVariableHeader());
                     logger.fine("Calculating summary statistics on a Double vector;");
                     calculateContinuousSummaryStatistics(dataFile, i, variableVector);
                     // calculate the UNF while we are at it:
@@ -776,7 +770,11 @@ public void produceDiscreteNumericSummaryStatistics(DataFile dataFile, File gene
                     && dataFile.getDataTable().getDataVariables().get(i).isTypeNumeric()) {
                 logger.fine("subsetting discrete-numeric vector");
 
-                Long[] variableVector = TabularSubsetGenerator.subsetLongVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
+                Long[] variableVector = TabularSubsetGenerator.subsetLongVector(
+                        new FileInputStream(generatedTabularFile), 
+                        i, 
+                        dataFile.getDataTable().getCaseQuantity().intValue(), 
+                        dataFile.getDataTable().isStoredWithVariableHeader());
                 // We are discussing calculating the same summary stats for 
                 // all numerics (the same kind of sumstats that we've been calculating
                 // for numeric continuous type)  -- L.A. Jul. 2014
@@ -810,7 +808,11 @@ public void produceCharacterSummaryStatistics(DataFile dataFile, File generatedT
             if (dataFile.getDataTable().getDataVariables().get(i).isTypeCharacter()) {
 
                 logger.fine("subsetting character vector");
-                String[] variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
+                String[] variableVector = TabularSubsetGenerator.subsetStringVector(
+                        new FileInputStream(generatedTabularFile), 
+                        i, 
+                        dataFile.getDataTable().getCaseQuantity().intValue(),
+                        dataFile.getDataTable().isStoredWithVariableHeader());
                 //calculateCharacterSummaryStatistics(dataFile, i, variableVector);
                 // calculate the UNF while we are at it:
                 logger.fine("Calculating UNF on a String vector");
@@ -828,20 +830,29 @@ public static void produceFrequencyStatistics(DataFile dataFile, File generatedT
         produceFrequencies(generatedTabularFile, vars);
     }
 
-    public static void produceFrequencies( File generatedTabularFile, List<DataVariable> vars) throws IOException {
+    public static void produceFrequencies(File generatedTabularFile, List<DataVariable> vars) throws IOException {
 
         for (int i = 0; i < vars.size(); i++) {
 
             Collection<VariableCategory> cats = vars.get(i).getCategories();
             int caseQuantity = vars.get(i).getDataTable().getCaseQuantity().intValue();
             boolean isNumeric = vars.get(i).isTypeNumeric();
+            boolean skipVariableHeaderLine = vars.get(i).getDataTable().isStoredWithVariableHeader();
             Object[] variableVector = null;
             if (cats.size() > 0) {
                 if (isNumeric) {
-                    variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, caseQuantity);
+                    variableVector = TabularSubsetGenerator.subsetFloatVector(
+                            new FileInputStream(generatedTabularFile), 
+                            i, 
+                            caseQuantity,
+                            skipVariableHeaderLine);
                 }
                 else {
-                    variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, caseQuantity);
+                    variableVector = TabularSubsetGenerator.subsetStringVector(
+                            new FileInputStream(generatedTabularFile), 
+                            i, 
+                            caseQuantity,
+                            skipVariableHeaderLine);
                 }
                 if (variableVector != null) {
                     Hashtable<Object, Double> freq = calculateFrequency(variableVector);
@@ -923,6 +934,7 @@ public boolean ingestAsTabular(Long datafile_id) {
         DataFile dataFile = fileService.find(datafile_id);
         boolean ingestSuccessful = false;
         boolean forceTypeCheck = false;
+        boolean storingWithVariableHeader = systemConfig.isStoringIngestedFilesWithHeaders();
         
         // Never attempt to ingest a file that's already ingested!
         if (dataFile.isTabularData()) {
@@ -1024,11 +1036,7 @@ public boolean ingestAsTabular(Long datafile_id) {
         
         TabularDataIngest tabDataIngest = null; 
         try {
-            if (additionalData != null) {
-                tabDataIngest = ingestPlugin.read(inputStream, additionalData);
-            } else {
-                tabDataIngest = ingestPlugin.read(inputStream, null);
-            }
+            tabDataIngest = ingestPlugin.read(inputStream, storingWithVariableHeader, additionalData);
         } catch (IOException ingestEx) {
             dataFile.SetIngestProblem();
             FileUtil.createIngestFailureReport(dataFile, ingestEx.getMessage());
@@ -1081,6 +1089,7 @@ public boolean ingestAsTabular(Long datafile_id) {
                 dataFile.setDataTable(tabDataIngest.getDataTable());
                 tabDataIngest.getDataTable().setDataFile(dataFile);
                 tabDataIngest.getDataTable().setOriginalFileName(originalFileName);
+                dataFile.getDataTable().setStoredWithVariableHeader(storingWithVariableHeader);
                 
                 try {
                     produceSummaryStatistics(dataFile, tabFile);
@@ -1172,6 +1181,7 @@ public boolean ingestAsTabular(Long datafile_id) {
 
                     // Replace contents of the file with the tab-delimited data produced:
                     dataAccess.savePath(Paths.get(tabFile.getAbsolutePath()));
+                    
                     // Reset the file size: 
                     dataFile.setFilesize(dataAccess.getSize());
                     
@@ -2297,7 +2307,7 @@ public static void main(String[] args) {
         TabularDataIngest tabDataIngest = null;
         
         try {
-            tabDataIngest = ingestPlugin.read(fileInputStream, null);
+            tabDataIngest = ingestPlugin.read(fileInputStream, false, null);
         } catch (IOException ingestEx) {
             System.err.println("Caught an exception trying to ingest file "+file+".");
             System.exit(1);
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java
index 223b171dfb5..0f23a3d9781 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java
@@ -20,10 +20,13 @@
 
 package edu.harvard.iq.dataverse.ingest.tabulardata;
 
+import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.ingest.tabulardata.spi.*;
 //import edu.harvard.iq.dataverse.ingest.plugin.metadata.*;
 import java.io.*;
 import static java.lang.System.*;
+import java.util.Iterator;
+import java.util.List;
 import java.util.regex.Matcher;
 
 /**
@@ -98,7 +101,7 @@ public void setDataLanguageEncoding(String dataLanguageEncoding) {
      *
      * @throws java.io.IOException if a reading error occurs.
      */
-    public abstract TabularDataIngest read(BufferedInputStream stream, File dataFile)
+    public abstract TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile)
         throws IOException;
 
     
@@ -176,5 +179,26 @@ protected String escapeCharacterString(String rawString) {
         
         return escapedString;
     }
+    
+    protected String generateVariableHeader(List<DataVariable> dvs) {
+        String varHeader = null;
+
+        if (dvs != null) {
+            Iterator<DataVariable> iter = dvs.iterator();
+            DataVariable dv;
+
+            if (iter.hasNext()) {
+                dv = iter.next();
+                varHeader = dv.getName();
+            }
+
+            while (iter.hasNext()) {
+                dv = iter.next();
+                varHeader = varHeader + "\t" + dv.getName();
+            }
+        }
+
+        return varHeader;
+    }
 
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java
index 57f76df3802..f8816ababb4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java
@@ -110,7 +110,7 @@ private void init() throws IOException {
      * @throws java.io.IOException if a reading error occurs.
      */
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException {
         init();
 
         if (stream == null) {
@@ -124,7 +124,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
         File tabFileDestination = File.createTempFile("data-", ".tab");
         PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath());
 
-        int lineCount = readFile(localBufferedReader, dataTable, tabFileWriter);
+        int lineCount = readFile(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter);
 
         logger.fine("Tab file produced: " + tabFileDestination.getAbsolutePath());
 
@@ -136,14 +136,17 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
 
     }
 
-    public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException {
+    public int readFile(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter finalOut) throws IOException {
 
         List<DataVariable> variableList = new ArrayList<>();
         CSVParser parser = new CSVParser(csvReader, inFormat.withHeader());
         Map<String, Integer> headers = parser.getHeaderMap();
 
         int i = 0;
+        String variableNameHeader = null;
+        
         for (String varName : headers.keySet()) {
+            // @todo: is .keySet() guaranteed to return the names in the right order?
             if (varName == null || varName.isEmpty()) {
                 // TODO:
                 // Add a sensible variable name validation algorithm.
@@ -158,6 +161,13 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f
 
             dv.setTypeCharacter();
             dv.setIntervalDiscrete();
+            
+            if (saveWithVariableHeader) {
+                    variableNameHeader = variableNameHeader == null
+                            ? varName 
+                            : variableNameHeader.concat("\t" + varName);
+                }
+            
             i++;
         }
 
@@ -342,6 +352,14 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f
         try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) {
             parser = new CSVParser(secondPassReader, inFormat.withHeader());
             String[] caseRow = new String[headers.size()];
+            
+            // Save the variable name header, if requested
+            if (saveWithVariableHeader) {
+                if (variableNameHeader == null) {
+                    throw new IOException("failed to generate the Variable Names header");
+                }
+                finalOut.println(variableNameHeader);
+            }
 
             for (CSVRecord record : parser) {
                 if (!record.isConsistent()) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java
index 2dec701592e..73818f8fb62 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java
@@ -505,7 +505,7 @@ private void init() throws IOException {
     }
 
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException {
         dbgLog.info("***** DTAFileReader: read() start *****");
         
         if (dataFile != null) {
@@ -519,7 +519,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
             if (releaseNumber!=104) {
                 decodeExpansionFields(stream);
             }
-            decodeData(stream);
+            decodeData(stream, storeWithVariableHeader);
             decodeValueLabels(stream);
 
             ingesteddata.setDataTable(dataTable);
@@ -1665,7 +1665,7 @@ private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOEx
         dbgLog.fine("parseValueLabelsRelease108(): end");
     }
 
-    private void decodeData(BufferedInputStream stream) throws IOException {
+    private void decodeData(BufferedInputStream stream, boolean saveWithVariableHeader) throws IOException {
 
         dbgLog.fine("\n***** decodeData(): start *****");
 
@@ -1719,6 +1719,11 @@ private void decodeData(BufferedInputStream stream) throws IOException {
           BUT, this needs to be reviewed/confirmed etc! 
          */
         //String[][] dateFormat = new String[nvar][nobs];
+        
+        // add the variable header here, if needed
+        if (saveWithVariableHeader) {
+            pwout.println(generateVariableHeader(dataTable.getDataVariables())); 
+        }
 
         for (int i = 0; i < nobs; i++) {
             byte[] dataRowBytes = new byte[bytes_per_row];
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java
index 22581834676..53607d541de 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java
@@ -339,7 +339,7 @@ private void init() throws IOException {
     }
 
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException {
         logger.fine("NewDTAFileReader: read() start");
 
         // shit ton of diagnostics (still) needed here!!  -- L.A.
@@ -363,7 +363,13 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
         // "characteristics" - STATA-proprietary information
         // (we are skipping it)
         readCharacteristics(dataReader);
-        readData(dataReader);
+        
+        String variableHeaderLine = null; 
+        
+        if (storeWithVariableHeader) {
+            variableHeaderLine = generateVariableHeader(dataTable.getDataVariables());
+        }
+        readData(dataReader, variableHeaderLine);
 
         // (potentially) large, (potentially) non-ASCII character strings
         // saved outside the <data> section, and referenced 
@@ -707,7 +713,7 @@ private void readCharacteristics(DataReader reader) throws IOException {
 
     }
 
-    private void readData(DataReader reader) throws IOException {
+    private void readData(DataReader reader, String variableHeaderLine) throws IOException {
         logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_data());
         logger.fine("readData(): start");
         reader.readOpeningTag(TAG_DATA);
@@ -731,6 +737,11 @@ private void readData(DataReader reader) throws IOException {
         FileOutputStream fileOutTab = new FileOutputStream(tabDelimitedDataFile);
         PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);
 
+        // add the variable header here, if needed
+        if (variableHeaderLine != null) {
+            pwout.println(variableHeaderLine); 
+        }
+        
         logger.fine("Beginning to read data stream.");
 
         for (int i = 0; i < nobs; i++) {
@@ -999,6 +1010,8 @@ private void readSTRLs(DataReader reader) throws IOException {
             int nobs = dataTable.getCaseQuantity().intValue();
 
             String[] line;
+            
+            //@todo: adjust for the case of storing the file with the variable header
 
             for (int obsindex = 0; obsindex < nobs; obsindex++) {
                 if (scanner.hasNext()) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java
index c90b0ea6950..2ee966c3e31 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java
@@ -180,7 +180,7 @@ private void init() throws IOException {
     }
     
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File additionalData) throws IOException{
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File additionalData) throws IOException{
         dbgLog.fine("PORFileReader: read() start");
         
         if (additionalData != null) {
@@ -226,7 +226,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t
                     headerId = "8S";
                 }
 
-                decode(headerId, bfReader);
+                decode(headerId, bfReader, storeWithVariableHeader);
 
                 
                 // for last iteration
@@ -382,7 +382,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t
         return ingesteddata;
     }
     
-    private void decode(String headerId, BufferedReader reader) throws IOException{
+    private void decode(String headerId, BufferedReader reader, boolean storeWithVariableHeader) throws IOException{
         if (headerId.equals("1")) decodeProductName(reader);
         else if (headerId.equals("2")) decodeLicensee(reader);
         else if (headerId.equals("3")) decodeFileLabel(reader);
@@ -398,7 +398,7 @@ private void decode(String headerId, BufferedReader reader) throws IOException{
         else if (headerId.equals("C")) decodeVariableLabel(reader);
         else if (headerId.equals("D")) decodeValueLabel(reader);
         else if (headerId.equals("E")) decodeDocument(reader);
-        else if (headerId.equals("F")) decodeData(reader);
+        else if (headerId.equals("F")) decodeData(reader, storeWithVariableHeader);
     }
     
 
@@ -1099,7 +1099,7 @@ private void decodeDocument(BufferedReader reader) throws IOException {
     }
 
 
-    private void decodeData(BufferedReader reader) throws IOException {
+    private void decodeData(BufferedReader reader, boolean storeWithVariableHeader) throws IOException {
         dbgLog.fine("decodeData(): start");
         // TODO: get rid of this "variableTypeFinal"; -- L.A. 4.0 beta
         int[] variableTypeFinal= new int[varQnty];
@@ -1126,6 +1126,9 @@ private void decodeData(BufferedReader reader) throws IOException {
             // contents (variable) checker concering decimals
             Arrays.fill(variableTypeFinal, 0);
 
+            if (storeWithVariableHeader) {
+                pwout.println(StringUtils.join(variableNameList, "\t"));
+            } 
             // raw-case counter
             int j = 0; // case
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
index eb1353fd792..50f2f89e354 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
@@ -473,7 +473,7 @@ private void init() throws IOException {
    * @throws java.io.IOException if a reading error occurs.
    */
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException {
 
         init();
 
@@ -509,7 +509,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
             File tabFileDestination = File.createTempFile("data-", ".tab");
             PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), "UTF-8");
         
-            int lineCount = csvFileReader.read(localBufferedReader, dataTable, tabFileWriter);
+            int lineCount = csvFileReader.read(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter);
 
             LOG.fine("RDATAFileReader: successfully read "+lineCount+" lines of tab-delimited data.");
         
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java
index f60b7733463..fbe7e401b57 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java
@@ -61,8 +61,8 @@ public RTabFileParser (char delimiterChar) {
     // should be used.
 
 
-  public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout) throws IOException {
-    dbgLog.warning("RTabFileParser: Inside R Tab file parser");
+    public int read(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter pwout) throws IOException {
+        dbgLog.fine("RTabFileParser: Inside R Tab file parser");
       
         int varQnty = 0;
 
@@ -94,14 +94,17 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout
         boolean[] isTimeVariable = new boolean[varQnty];
         boolean[] isBooleanVariable = new boolean[varQnty];
         
+        String variableNameHeader = null;
+        
         if (dataTable.getDataVariables() != null) {
             for (int i = 0; i < varQnty; i++) {
                 DataVariable var = dataTable.getDataVariables().get(i);
                 if (var == null) {
-                    // throw exception!
+                    throw new IOException ("null dataVariable passed to the parser");
+                    
                 }
                 if (var.getType() == null) {
-                    // throw exception!
+                    throw new IOException ("null dataVariable type passed to the parser");
                 }
                 if (var.isTypeCharacter()) {
                     isCharacterVariable[i] = true; 
@@ -128,13 +131,24 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout
                         }
                     }
                 } else {
-                    // throw excepion "unknown variable format type" - ?
+                     throw new IOException ("unknown dataVariable format passed to the parser");
                 }
                 
-                
+                if (saveWithVariableHeader) {
+                    variableNameHeader = variableNameHeader == null  
+                            ? var.getName() 
+                            : variableNameHeader.concat("\t" + var.getName());
+                }
             }
         } else {
-            // throw exception!
+            throw new IOException ("null dataVariables list passed to the parser");
+        }
+        
+        if (saveWithVariableHeader) {
+            if (variableNameHeader == null) {
+                throw new IOException ("failed to generate the Variable Names header");
+            }
+            pwout.println(variableNameHeader);
         }
         
         while ((line = csvReader.readLine()) != null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java
index 682b8f1166c..5eecbdfb666 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java
@@ -338,7 +338,7 @@ private void init() throws IOException {
         }
     }
 
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException{
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException{
         dbgLog.info("SAVFileReader: read() start");
         
         if (dataFile != null) {
@@ -422,7 +422,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
 
 	    methodCurrentlyExecuted = "decodeRecordTypeData";
 	    dbgLog.fine("***** SAVFileReader: executing method decodeRecordTypeData");
-	    decodeRecordTypeData(stream); 
+	    decodeRecordTypeData(stream, storeWithVariableHeader); 
 
 		
 	} catch (IllegalArgumentException e) {
@@ -2308,7 +2308,7 @@ void decodeRecordType999(BufferedInputStream stream) throws IOException {
     
     
 
-    void decodeRecordTypeData(BufferedInputStream stream) throws IOException {
+    void decodeRecordTypeData(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
         dbgLog.fine("decodeRecordTypeData(): start");
 
 	///String fileUnfValue = null;
@@ -2320,9 +2320,9 @@ void decodeRecordTypeData(BufferedInputStream stream) throws IOException {
             throw new IllegalArgumentException("stream == null!");
         }
         if (isDataSectionCompressed){
-            decodeRecordTypeDataCompressed(stream);
+            decodeRecordTypeDataCompressed(stream, storeWithVariableHeader);
         } else {
-            decodeRecordTypeDataUnCompressed(stream);
+            decodeRecordTypeDataUnCompressed(stream, storeWithVariableHeader);
         }
             
         /* UNF calculation was here... */
@@ -2362,7 +2362,7 @@ PrintWriter createOutputWriter (BufferedInputStream stream) throws IOException {
 
     }
 
-    void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException {
+    void decodeRecordTypeDataCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
 
         dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****");
 
@@ -2395,7 +2395,10 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti
         dbgLog.fine("printFormatTable:\n" + printFormatTable);
         variableFormatTypeList = new String[varQnty];
 
-
+        // write the variable header out, if instructed to do so
+        if (storeWithVariableHeader) {
+            pwout.println(generateVariableHeader(dataTable.getDataVariables()));
+        }
 
         for (int i = 0; i < varQnty; i++) {
             variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE.get(
@@ -2947,7 +2950,7 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti
     }
 
 
-    void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException {
+    void decodeRecordTypeDataUnCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
         dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");
 
         if (stream ==null){
@@ -3013,6 +3016,11 @@ void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOExcep
         ///dataTable2 = new Object[varQnty][caseQnty];
 	// storage of date formats to pass to UNF	
         ///dateFormats = new String[varQnty][caseQnty];
+        
+        // write the variable header out, if instructed to do so
+        if (storeWithVariableHeader) {
+            pwout.println(generateVariableHeader(dataTable.getDataVariables()));
+        }
 
         try {
             for (int i = 0; ; i++){  // case-wise loop
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java
index ea3f3868f24..ef91793690e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java
@@ -36,7 +36,6 @@
 import org.apache.commons.lang3.StringUtils;
 
 import org.apache.poi.xssf.eventusermodel.XSSFReader;
-import org.apache.poi.xssf.usermodel.XSSFRichTextString;
 import org.apache.poi.xssf.model.SharedStrings;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.xml.sax.Attributes;
@@ -81,7 +80,9 @@ private void init() throws IOException {
      * @throws java.io.IOException if a reading error occurs.
      */
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException {
+        // @todo: implement handling of "saveWithVariableHeader" option
+        
         init();
         
         TabularDataIngest ingesteddata = new TabularDataIngest();
@@ -118,6 +119,10 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
         String[] caseRow = new String[varQnty];
         String[] valueTokens;
 
+        // add the variable header here, if needed
+        if (storeWithVariableHeader) {
+            finalWriter.println(generateVariableHeader(dataTable.getDataVariables())); 
+        }
         
         while ((line = secondPassReader.readLine()) != null) {
             // chop the line:
@@ -549,7 +554,7 @@ public static void main(String[] args) throws Exception {
         
         BufferedInputStream xlsxInputStream = new BufferedInputStream(new FileInputStream(new File(args[0])));
         
-        TabularDataIngest dataIngest = testReader.read(xlsxInputStream, null);
+        TabularDataIngest dataIngest = testReader.read(xlsxInputStream, false, null);
         
         dataTable = dataIngest.getDataTable();
         
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 627cef08d8b..3b7632f3d9e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -598,7 +598,12 @@ Whether Harvesting (OAI) service is enabled
          * Allows an instance admin to disable Solr search facets on the collection
          * and dataset pages instantly
          */
-        DisableSolrFacets
+        DisableSolrFacets,
+        /**
+         * When ingesting tabular data files, store the generated tab-delimited 
+         * files *with* the variable names line up top. 
+         */
+        StoreIngestedTabularFilesWithVarHeaders
         ;
 
         @Override
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index 3c6992f8ec3..ded394833f1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -1173,4 +1173,12 @@ public boolean isStorageQuotasEnforced() {
     public Long getTestStorageQuotaLimit() {
         return settingsService.getValueForKeyAsLong(SettingsServiceBean.Key.StorageQuotaSizeInBytes);
     }
+    /**
+     * Should we store tab-delimited files produced during ingest *with* the 
+     * variable name header line included? 
+     * @return boolean - defaults to false.
+     */
+    public boolean isStoringIngestedFilesWithHeaders() {
+        return settingsService.isTrueForKey(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders, false);
+    }
 }
diff --git a/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql
new file mode 100644
index 00000000000..7c52a00107a
--- /dev/null
+++ b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql
@@ -0,0 +1 @@
+ALTER TABLE datatable ADD COLUMN IF NOT EXISTS storedWithVariableHeader BOOLEAN DEFAULT FALSE;
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 915f82a6de2..cfc6f9335b3 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -16,6 +16,7 @@
 import io.restassured.path.xml.XmlPath;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.io.File;
 import java.io.IOException;
@@ -33,6 +34,8 @@
 import jakarta.json.JsonObjectBuilder;
 
 import static jakarta.ws.rs.core.Response.Status.*;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Matchers;
 import org.junit.jupiter.api.AfterAll;
@@ -2483,4 +2486,129 @@ public void testCollectionStorageQuotas() {
         
         UtilIT.deleteSetting(SettingsServiceBean.Key.UseStorageQuotas);
     }
+    
+    @Test
+    public void testIngestWithAndWithoutVariableHeader() throws NoSuchAlgorithmException {
+        msgt("testIngestWithAndWithoutVariableHeader");
+        
+        // The compact Stata file we'll be using for this test: 
+        // (this file is provided by Stata inc. - it's genuine quality)
+        String pathToFile = "scripts/search/data/tabular/stata13-auto.dta";
+        // The pre-calculated MD5 signature of the *complete* tab-delimited 
+        // file as seen by the final Access API user (i.e., with the variable 
+        // header line in it):
+        String tabularFileMD5 = "f298c2567cc8eb544e36ad83edf6f595";
+        // Expected byte sizes of the generated tab-delimited file as stored, 
+        // with and without the header:
+        int tabularFileSizeWoutHeader = 4026; 
+        int tabularFileSizeWithHeader = 4113; 
+
+        String apiToken = createUserGetToken();
+        String dataverseAlias = createDataverseGetAlias(apiToken);
+        Integer datasetIdA = createDatasetGetId(dataverseAlias, apiToken);
+        
+        // Before we do anything else, make sure that the instance is configured 
+        // the "old" way, i.e., to store ingested files without the headers:
+        UtilIT.deleteSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders);
+        
+        Response addResponse = UtilIT.uploadFileViaNative(datasetIdA.toString(), pathToFile, apiToken);
+        addResponse.prettyPrint();
+
+        addResponse.then().assertThat()
+                .body("data.files[0].dataFile.contentType", equalTo("application/x-stata-13"))
+                .body("data.files[0].label", equalTo("stata13-auto.dta"))
+                .statusCode(OK.getStatusCode());
+
+        Long fileIdA = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id");
+        assertNotNull(fileIdA);
+
+        // Give file time to ingest
+        assertTrue(UtilIT.sleepForLock(datasetIdA.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile + "(A)");
+
+        // Check the metadata to confirm that the file has ingested: 
+
+        Response fileDataResponse = UtilIT.getFileData(fileIdA.toString(), apiToken);
+        fileDataResponse.prettyPrint();
+        fileDataResponse.then().assertThat()
+                .body("data.dataFile.filename", equalTo("stata13-auto.tab"))
+                .body("data.dataFile.contentType", equalTo("text/tab-separated-values"))
+                .body("data.dataFile.filesize", equalTo(tabularFileSizeWoutHeader))
+                .statusCode(OK.getStatusCode());
+        
+
+        // Download the file, verify the checksum: 
+
+        Response fileDownloadResponse = UtilIT.downloadFile(fileIdA.intValue(), apiToken);
+        fileDownloadResponse.then().assertThat()
+                .statusCode(OK.getStatusCode()); 
+        
+        byte[] fileDownloadBytes = fileDownloadResponse.body().asByteArray(); 
+        MessageDigest messageDigest = MessageDigest.getInstance("MD5");
+        messageDigest.update(fileDownloadBytes);
+        byte[] rawDigestBytes = messageDigest.digest();
+        String tabularFileMD5calculated = FileUtil.checksumDigestToString(rawDigestBytes);
+        
+        msgt("md5 of the downloaded file (saved without the variable name header): "+tabularFileMD5calculated);
+        
+        assertEquals(tabularFileMD5, tabularFileMD5calculated);
+
+        // Repeat the whole thing, in another dataset (because we will be uploading 
+        // an identical file), but with the "store with the header setting enabled): 
+        
+        UtilIT.enableSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders);
+        
+        Integer datasetIdB = createDatasetGetId(dataverseAlias, apiToken);
+        
+        addResponse = UtilIT.uploadFileViaNative(datasetIdB.toString(), pathToFile, apiToken);
+        addResponse.prettyPrint();
+
+        addResponse.then().assertThat()
+                .body("data.files[0].dataFile.contentType", equalTo("application/x-stata-13"))
+                .body("data.files[0].label", equalTo("stata13-auto.dta"))
+                .statusCode(OK.getStatusCode());
+
+        Long fileIdB = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id");
+        assertNotNull(fileIdB);
+
+        // Give file time to ingest
+        assertTrue(UtilIT.sleepForLock(datasetIdB.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile + "(B)");
+        
+        // Check the metadata to confirm that the file has ingested: 
+
+        fileDataResponse = UtilIT.getFileData(fileIdB.toString(), apiToken);
+        fileDataResponse.prettyPrint();
+        fileDataResponse.then().assertThat()
+                .body("data.dataFile.filename", equalTo("stata13-auto.tab"))
+                .body("data.dataFile.contentType", equalTo("text/tab-separated-values"))
+                .body("data.dataFile.filesize", equalTo(tabularFileSizeWithHeader))
+                .statusCode(OK.getStatusCode());
+        
+
+        // Download the file, verify the checksum, again
+
+        fileDownloadResponse = UtilIT.downloadFile(fileIdB.intValue(), apiToken);
+        fileDownloadResponse.then().assertThat()
+                .statusCode(OK.getStatusCode()); 
+        
+        fileDownloadBytes = fileDownloadResponse.body().asByteArray(); 
+        messageDigest.reset();
+        messageDigest.update(fileDownloadBytes);
+        rawDigestBytes = messageDigest.digest();
+        tabularFileMD5calculated = FileUtil.checksumDigestToString(rawDigestBytes);
+        
+        msgt("md5 of the downloaded file (saved with the variable name header): "+tabularFileMD5calculated);
+        
+        assertEquals(tabularFileMD5, tabularFileMD5calculated);
+
+        // In other words, whether the file was saved with, or without the header, 
+        // as downloaded by the user, the end result must be the same in both cases!
+        // In other words, whether that first line with the variable names is already
+        // in the physical file, or added by Dataverse on the fly, the downloaded
+        // content must be identical. 
+        
+        UtilIT.deleteSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders);
+        
+        // @todo: cleanup? 
+    }
+    
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java
index 96e314324ab..ca64bcc794f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java
@@ -99,7 +99,7 @@ private DataFile readFileCalcFreq(String fileName, String type ) {
         TabularDataIngest tabDataIngest = null;
 
         try {
-            tabDataIngest = ingestPlugin.read(fileInputStream, null);
+            tabDataIngest = ingestPlugin.read(fileInputStream, false, null);
         } catch (IOException ingestEx) {
             tabDataIngest = null;
             System.out.println("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage());
diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java
index fc066ef195e..9afb35918a4 100644
--- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java
@@ -52,7 +52,7 @@ public void testRead() {
         try (BufferedInputStream stream = new BufferedInputStream(
                 new FileInputStream(testFile))) {
             CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ',');
-            File outFile = instance.read(stream, null).getTabDelimitedFile();
+            File outFile = instance.read(stream, false, null).getTabDelimitedFile();
             result = new BufferedReader(new FileReader(outFile));
             logger.fine("Final pass: " + outFile.getPath());
         } catch (IOException ex) {
@@ -104,7 +104,7 @@ public void testVariables() {
         try (BufferedInputStream stream = new BufferedInputStream(
                 new FileInputStream(testFile))) {
             CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ',');
-            result = instance.read(stream, null).getDataTable();
+            result = instance.read(stream, false, null).getDataTable();
         } catch (IOException ex) {
             fail("" + ex);
         }
@@ -154,7 +154,7 @@ public void testSubset() {
                 new FileInputStream(testFile))) {
             CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ',');
 
-            ingestResult = instance.read(stream, null);
+            ingestResult = instance.read(stream, false, null);
 
             generatedTabFile = ingestResult.getTabDelimitedFile();
             generatedDataTable = ingestResult.getDataTable();
@@ -195,7 +195,7 @@ public void testSubset() {
                 fail("Failed to open generated tab-delimited file for reading" + ioex);
             }
 
-            Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+            Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
             assertArrayEquals(floatVectors[vectorCount++], columnVector, "column " + i + ":");
         }
@@ -229,7 +229,7 @@ public void testSubset() {
                 fail("Failed to open generated tab-delimited file for reading" + ioex);
             }
 
-            Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+            Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
             assertArrayEquals(longVectors[vectorCount++], columnVector, "column " + i + ":");
         }
@@ -256,7 +256,7 @@ public void testSubset() {
                 fail("Failed to open generated tab-delimited file for reading" + ioex);
             }
 
-            String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+            String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
             assertArrayEquals(stringVectors[vectorCount++], columnVector, "column " + i + ":");
         }
@@ -298,7 +298,7 @@ public void testVariableUNFs() {
                 new FileInputStream(testFile))) {
             CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ',');
 
-            ingestResult = instance.read(stream, null);
+            ingestResult = instance.read(stream, false, null);
 
             generatedTabFile = ingestResult.getTabDelimitedFile();
             generatedDataTable = ingestResult.getDataTable();
@@ -327,7 +327,7 @@ public void testVariableUNFs() {
                     fail("Failed to open generated tab-delimited file for reading" + ioex);
                 }
 
-                Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+                Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
                 try {
                     unf = UNFUtil.calculateUNF(columnVector);
                 } catch (IOException | UnfException ioex) {
@@ -345,7 +345,7 @@ public void testVariableUNFs() {
                     fail("Failed to open generated tab-delimited file for reading" + ioex);
                 }
 
-                Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+                Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
                 try {
                     unf = UNFUtil.calculateUNF(columnVector);
@@ -363,7 +363,7 @@ public void testVariableUNFs() {
                     fail("Failed to open generated tab-delimited file for reading" + ioex);
                 }
 
-                String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+                String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
                 String[] dateFormats = null;
 
@@ -401,7 +401,7 @@ public void testVariableUNFs() {
     public void testBrokenCSV() {
         String brokenFile = "src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/BrokenCSV.csv";
         try {
-            new CSVFileReader(new CSVFileReaderSpi(), ',').read(null, null);
+            new CSVFileReader(new CSVFileReaderSpi(), ',').read(null, false, null);
             fail("IOException not thrown on null csv");
         } catch (NullPointerException ex) {
             String expMessage = null;
@@ -412,7 +412,7 @@ public void testBrokenCSV() {
         }
         try (BufferedInputStream stream = new BufferedInputStream(
                 new FileInputStream(brokenFile))) {
-            new CSVFileReader(new CSVFileReaderSpi(), ',').read(stream, null);
+            new CSVFileReader(new CSVFileReaderSpi(), ',').read(stream, false, null);
             fail("IOException was not thrown when collumns do not align.");
         } catch (IOException ex) {
             String expMessage = BundleUtil.getStringFromBundle("ingest.csv.recordMismatch",
diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java
index 113e9be6b54..8af36d6466d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java
@@ -16,7 +16,7 @@ public class DTAFileReaderTest {
 
     @Test
     public void testOs() throws IOException {
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/50by1000.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/50by1000.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("rel_8_or_9", result.getDataTable().getOriginalFormatVersion());
         assertEquals(50, result.getDataTable().getDataVariables().size());
diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java
index c963346b05e..0f14054f472 100644
--- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java
@@ -25,7 +25,7 @@ public void testAuto() throws IOException {
         instance = new NewDTAFileReader(null, 117);
         // From https://www.stata-press.com/data/r13/auto.dta
         // `strings` shows "<stata_dta><header><release>117</release>"
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/stata13-auto.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/stata13-auto.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion());
         assertEquals(12, result.getDataTable().getDataVariables().size());
@@ -39,7 +39,7 @@ public void testAuto() throws IOException {
     @Test
     public void testStrl() throws IOException {
         instance = new NewDTAFileReader(null, 118);
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "strl.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "strl.dta"))), false, nullDataFile);
         DataTable table = result.getDataTable();
         assertEquals("application/x-stata", table.getOriginalFileFormat());
         assertEquals("STATA 14", table.getOriginalFormatVersion());
@@ -58,7 +58,7 @@ public void testStrl() throws IOException {
     @Test
     public void testDates() throws IOException {
         instance = new NewDTAFileReader(null, 118);
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "dates.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "dates.dta"))), false, nullDataFile);
         DataTable table = result.getDataTable();
         assertEquals("application/x-stata", table.getOriginalFileFormat());
         assertEquals("STATA 14", table.getOriginalFormatVersion());
@@ -77,7 +77,7 @@ public void testDates() throws IOException {
     @Test
     void testNull() {
         instance = new NewDTAFileReader(null, 117);
-        assertThrows(IOException.class, () -> instance.read(null, new File("")));
+        assertThrows(IOException.class, () -> instance.read(null, false, new File("")));
     }
 
     // TODO: Can we create a small file to check into the code base that exercises the value-label names non-zero offset issue?
@@ -87,7 +87,7 @@ public void testFirstCategoryNonZeroOffset() throws IOException {
         instance = new NewDTAFileReader(null, 117);
 
         // https://dataverse.harvard.edu/file.xhtml?fileId=2865667 Stata 13 HouseImputingCivilRightsInfo.dta md5=7dd144f27cdb9f8d1c3f4eb9c4744c42
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/HouseImputingCivilRightsInfo.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/HouseImputingCivilRightsInfo.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion());
         assertEquals(5, result.getDataTable().getDataVariables().size());
@@ -107,7 +107,7 @@ public void testFirstCategoryNonZeroOffset() throws IOException {
     public void testFirstCategoryNonZeroOffset1() throws IOException {
         instance = new NewDTAFileReader(null, 118);
         // https://dataverse.harvard.edu/file.xhtml?fileId=3140457 Stata 14: 2018_04_06_Aggregated_dataset_v2.dta
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/2018_04_06_Aggregated_dataset_v2.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/2018_04_06_Aggregated_dataset_v2.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("STATA 14", result.getDataTable().getOriginalFormatVersion());
         assertEquals(227, result.getDataTable().getDataVariables().size());
@@ -136,7 +136,7 @@ public void test33k() throws IOException {
     @Test
     public void testCharacteristics() throws IOException {
         instance = new NewDTAFileReader(null, 117);
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/15aa6802ee5-5d2ed1bf55a5.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/15aa6802ee5-5d2ed1bf55a5.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion());
         assertEquals(441, result.getDataTable().getDataVariables().size());