[#95] MUPIP LOAD on an empty ZWR file incorrectly should report 0 rec…

…ords loaded (instead of MAXSTRLEN/LDBINFMT errors) The main change is to get_load_format() in sr_unix/mupip_cvtgbl.c. It assumed that in case of a non-binary format extract file (MU_FMT_GO or MU_FMT_ZWR), there is always a non-empty 3rd line in the extract file. Although it is not possible for YottaDB (%GO or MUPIP EXTRACT) to create an empty extract file, it is possible for the user to delete entries in a ZWR or GO format extract file (which is an ascii file) effectively creating a file with just the first 2 lines (header). In that case, MUPIP LOAD currently issue a MAXSTRLEN error (in get_load_format()). This is get_load_format() assumes that a negative return from go_get() implies a string that is too long to fit in the passed in buffer. But there are 2 negative return values possible from that function, FILE_INPUT_GET_LINE2LONG and FILE_INPUT_GET_ERROR. A MAXSTRLEN error in the former case is correct but not in the latter case which effectively implies an EOF. Therefore this case is handled separately now by not issuing an error but instead returning as is to the caller function mupip_cvtgbl(). At this point, we are guaranteed *line3_len is set to 0 indicating there is no 3rd line seen. In mupip_cvtgbl(), line3_len is examined to see if it is 0 and if so it now knows the extract file only has 2 lines and so sets "end" to 2 (corresponding to the 2 lines of the extract file header) before calling go_load() which then knows not to read the extract file any more (i.e. no records to load). While at this, minor changes were done to go_load() so the last processed record in the LOADRECCNT message is displayed as 2 (and not 3) in this empty-extract-file case. A value of 3 implies there was 1 non-header line which is not correct for an empty extract file.
YottaDB · Sep 18, 2018 · 5ae6d39 · 5ae6d39
1 parent 0b74bdc
commit 5ae6d39
Show file tree

Hide file tree

Showing 2 changed files with 126 additions and 117 deletions.
diff --git a/sr_unix/go_load.c b/sr_unix/go_load.c
@@ -210,7 +210,7 @@ void go_load(uint4 begin, uint4 end, unsigned char *rec_buff, char *line3_ptr, i
 		{
 			util_out_print("!AD:!_  Key cnt: !@UQ  max subsc len: !UL  max data len: !UL", TRUE,
 				       LEN_AND_LIT("LOAD TOTAL"), &key_count, max_subsc_len, max_data_len);
-			tmp_rec_count = (iter == begin) ? iter : iter - 1;
+			tmp_rec_count = iter - 1;
 			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(3) MAKE_MSG_INFO(ERR_LOADRECCNT), 1, &tmp_rec_count);
 			mu_gvis();
 			util_out_print(0, TRUE);
@@ -406,9 +406,9 @@ void go_load(uint4 begin, uint4 end, unsigned char *rec_buff, char *line3_ptr, i
 	if (mupip_error_occurred && ONERROR_STOP == onerror)
 	{
 		tmp_rec_count = (go_format_val_read) ? iter - 1 : iter;
-		failed_record_count-=(go_format_val_read) ? 1 : 0;
+		failed_record_count -= (go_format_val_read) ? 1 : 0;
 	} else
-		tmp_rec_count = (iter == begin) ? iter : iter - 1;
+		tmp_rec_count = iter - 1;
 	if (0 != first_failed_rec_count)
 	{
 		if (tmp_rec_count > first_failed_rec_count)

diff --git a/sr_unix/mupip_cvtgbl.c b/sr_unix/mupip_cvtgbl.c
@@ -3,6 +3,9 @@
  * Copyright (c) 2001-2017 Fidelity National Information	*
  * Services, Inc. and/or its subsidiaries. All rights reserved.	*
  *								*
+ * Copyright (c) 2018 YottaDB LLC. and/or its subsidiaries.	*
+ * All rights reserved.						*
+ *								*
  *	This source code contains the intellectual property	*
  *	of its copyright holder(s), and is made available	*
  *	under a license.  If you do not know the terms of	*
@@ -172,6 +175,12 @@ void mupip_cvtgbl(void)
 		mupip_exit(ERR_LDBINFMT);
 	if (BADZCHSET == utf8)
 		mupip_exit(ERR_MUNOFINISH);
+	/* Check if "line3_len" is set to 0 by "get_load_format". If so, it means the extract file is empty (contains just
+	 * the 2 header lines). If so, set "end" (the last record # to be loaded) to 2 to indicate "go_load" should do no loading.
+	 * But do this only if the format has been determined to be GO or ZWR.
+	 */
+	if (((MU_FMT_GO == file_format) || (MU_FMT_ZWR == file_format)) && (0 == line3_len) && (2 < end))
+		end = 2;
 	if (cli_present("FORMAT") == CLI_PRESENT)
 	{	/* If the command speficies a format see if it matches the label */
 		len = SIZEOF(buff);
@@ -244,130 +253,130 @@ int get_load_format(char **line1_ptr, char **line3_ptr, int *line1_len, int *lin
 	*line1_len = file_input_read_xchar(line1, CHAR_TO_READ_LINE1_BIN);
 	*dos = *line3_len = *utf8_extract = 0;
 	ret = MU_FMT_UNRECOG;		/* actually means as yet undetermined; used to decide if still trying to find a format */
-	if (0 < *line1_len)
-	{
-		if (0 == STRNCMP_LIT(line1 + 6, "BINARY")) /* If file is binary do not look further */
-			return MU_FMT_BINARY;
-		for (line2_len = 0, c = line1, ctop = c + *line1_len; c < ctop; c++)
-		{	/* that 1st read is fixed length, so look for a terminator */
+	if (0 >= *line1_len)
+		return MU_FMT_GOQ;
+	if (0 == STRNCMP_LIT(line1 + 6, "BINARY")) /* If file is binary do not look further */
+		return MU_FMT_BINARY;
+	for (line2_len = 0, c = line1, ctop = c + *line1_len; c < ctop; c++)
+	{	/* that 1st read is fixed length, so look for a terminator */
+		if ('\n' == *c)
+		{	/* found a terminator */
+			line2 = c + 1;
+			line2_len = *line1_len - (line2 - line1);
+			*line1_len -= (line2_len + 1);
+			break;
+		}
+	}
+	if (c == ctop)
+	{	/* did not find a terminator - read some more of 1st line */
+		ptr = c;
+		if (0 <= (len = go_get(&ptr, 0, max_io_size)))		/* WARNING assignment */
+			*line1_len += len;
+		else
+		{	/* chances of this are small but we are careful not to overflow buffers */
+			mupip_error_occurred = TRUE;
+			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
+		}
+		line2_len = 0;
+		line2 = line1 + *line1_len;
+	} else if (line2_len)
+	{	/* If line1 length is actually < 12 chars, the buffer has characters from line2 as well */
+		for (c = line2, ctop = c + line2_len; c < ctop; c++)
+		{	/* look for a line 2 terminator */
 			if ('\n' == *c)
 			{	/* found a terminator */
-				line2 = c + 1;
-				line2_len = *line1_len - (line2 - line1);
-				*line1_len -= (line2_len + 1);
+				*line3_len = line2_len - (c - line2 + 1);
+				line2_len = c - line2;
 				break;
 			}
 		}
-		if (c == ctop)
-		{	/* did not find a terminator - read some more of 1st line */
-			ptr = c;
-			if (0 <= (len = go_get(&ptr, 0, max_io_size)))		/* WARNING assignment */
-				*line1_len += len;
-			else
-			{	/* chances of this are small but we are careful not to overflow buffers */
-				mupip_error_occurred = TRUE;
-				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
-			}
+	}
+	c1 = line1 + *line1_len;
+	*c1-- = 0;				/* null terminate the line to keep util_out_print happy */
+	if (*dos = ('\r' == *c1))		/* WARNING assignment */
+	{	/* [cariage] return before the <LF> / new line - we'll need to keep stripping them off */
+		*line1_len -= 1;
+		*c1 = 0;			/* null terminate earlier to keep util_out_print happy */
+	}
+	util_out_print("!AD", TRUE, *line1_len, line1);
+	if ((0 == line2_len) || (c == ctop))
+	{	/* need to get at least some more of 2nd line */
+		ptr = line2 + line2_len;
+		if (0 < (len = go_get(&ptr, 0, max_io_size)))		/* WARNING assignment */
+			line2_len += len;
+		else
+		{	/* chances of this are small but we are careful not to overflow buffers */
+			ret = MU_FMT_GOQ;	/* abusing this value to mean not working, as we can't discover GOQ */
 			line2_len = 0;
-			line2 = line1 + *line1_len;
-		} else if (line2_len)
-		{	/* If line1 length is actually < 12 chars, the buffer has characters from line2 as well */
-			for (c = line2, ctop = c + line2_len; c < ctop; c++)
-			{	/* look for a line 2 terminator */
-				if ('\n' == *c)
-				{	/* found a terminator */
-					*line3_len = line2_len - (c - line2 + 1);
-					line2_len = c - line2;
-					break;
-				}
-			}
+			mupip_error_occurred = TRUE;
+			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
 		}
-		c1 = line1 + *line1_len;
-		*c1-- = 0;				/* null terminate the line to keep util_out_print happy */
-		if (*dos = ('\r' == *c1))		/* WARNING assignment */
-		{	/* [cariage] return before the <LF> / new line - we'll need to keep stripping them off */
-			*line1_len -= 1;
-			*c1 = 0;			/* null terminate earlier to keep util_out_print happy */
+	}
+	if (0 >= line2_len)
+		return MU_FMT_GOQ;
+	/* we have 2 label lines to work with */
+	line2_len -= *dos;
+	c1 = line2 + line2_len;
+	*c1 = 0;	/* null terminate the line to keep regex in bounds */
+	util_out_print("!AD", TRUE, line2_len, line2);
+	if (gtm_regex_perf("ZWR", line2))
+		ret = MU_FMT_ZWR;		/* settle for any ZWR in the second line of the label */
+	if ((MU_FMT_UNRECOG == ret) &&
+			gtm_regex_perf("(GT.M )?[0-9]{2}[-]([A-Z]{3})[-][0-9]{4}[ ]{1,2}[0-9]{2}[:][0-9]{2}[:][0-9]{2}", line2))
+		ret = MU_FMT_GO;	/* GT.M DD-MON-YEAR  24:60:SS used by MUPIP EXTRACT & %GO */
+	if ((MU_FMT_UNRECOG == ret) && gtm_regex_perf("GLO", line2))
+		ret = MU_FMT_GO;	/* settle for any GLO in the second line of the label */
+	for (c = line2 + line2_len + 1, ctop = c + *line3_len, c1 = line3; c < ctop; c++)
+	{	/* if the first 2 lines were really short, move to other buffer looking for a line 3 terminator */
+		if ('\n' == *c)
+		{	/* found a terminator */
+			*line3_len = c1 - line3;
+			break;
+		} else
+			*c1 = *c;
+	}
+	if (c == ctop)
+	{	/* get all or some of line 3 - the first non-label line */
+		ptr = line3 + *line3_len;
+		if (0 < (len = go_get(&ptr, 0, *max_rec_size)))
+		{
+			*line3_len += (len - *dos);
+			c1 = line3 + *line3_len;
+			*c1 = 0;		/* null terminate the line to keep regex in bounds */
+		} else if (FILE_INPUT_GET_LINE2LONG == len)
+		{	/* chances of this are small but we are careful not to overflow buffers */
+			ret = MU_FMT_GOQ;
+			mupip_error_occurred = TRUE;
+			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
+		} else
+		{
+			assert(FILE_INPUT_GET_ERROR == len);
+			/* This is an EOF situation. That is, the extract file has only 2 lines and no records to load. */
+			assert(0 == *line3_len);
 		}
-		util_out_print("!AD", TRUE, *line1_len, line1);
-		if ((0 == line2_len) || (c == ctop))
-		{	/* need to get at least some more of 2nd line */
-			ptr = line2 + line2_len;
-			if (0 < (len = go_get(&ptr, 0, max_io_size)))		/* WARNING assignment */
-				line2_len += len;
+	} else
+	{
+		*line3_len = 0;
+		ret = MU_FMT_GOQ;	/* abusing this value to mean not working, as we can't discover GOQ */
+	}
+	if ((MU_FMT_UNRECOG == ret) && *line3_len && gtm_regex_perf("\\^[%A-Za-z][0-9A-Za-z]*(\\(.*\\))?$", line3))
+		ret = MU_FMT_GO;	/* gvn only */
+	if ((MU_FMT_UNRECOG == ret) && *line3_len
+			&& gtm_regex_perf("\\^[%A-Za-z][0-9A-Za-z]*(\\(.*\\))?=(\".*\"|-?([0-9]+|[0-9]*\\.[0-9]+))$", line3))
+		ret = MU_FMT_ZWR;	 /* gvn=val */
+	if (MU_FMT_UNRECOG != ret)
+	{
+		*utf8_extract = gtm_regex_perf("UTF-8", line1);
+		if ((*utf8_extract && !gtm_utf8_mode) || (!*utf8_extract && gtm_utf8_mode))
+		{	/* extract CHSET doesn't match current $ZCHSET */
+			if (*utf8_extract)
+				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_LOADINVCHSET,
+					2, LEN_AND_LIT("UTF-8"));
 			else
-			{	/* chances of this are small but we are careful not to overflow buffers */
-				ret = MU_FMT_GOQ;	/* abusing this value to mean not working, as we can't discover GOQ */
-				line2_len = 0;
-				mupip_error_occurred = TRUE;
-				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
-			}
+				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_LOADINVCHSET, 2, LEN_AND_LIT("M"));
+			*utf8_extract = BADZCHSET;
 		}
-		if (0 < line2_len)
-		{	/* we have 2 label lines to work with */
-			line2_len -= *dos;
-			c1 = line2 + line2_len;
-			*c1 = 0;	/* null terminate the line to keep regex in bounds */
-			util_out_print("!AD", TRUE, line2_len, line2);
-			if (gtm_regex_perf("ZWR", line2))
-				ret = MU_FMT_ZWR;		/* settle for any ZWR in the second line of the label */
-			if ((MU_FMT_UNRECOG == ret) &&
-				gtm_regex_perf("(GT.M )?[0-9]{2}[-]([A-Z]{3})[-][0-9]{4}[ ]{1,2}[0-9]{2}[:][0-9]{2}[:][0-9]{2}",
-					line2))
-				ret = MU_FMT_GO;	/* GT.M DD-MON-YEAR  24:60:SS used by MUPIP EXTRACT & %GO */
-			if ((MU_FMT_UNRECOG == ret) && gtm_regex_perf("GLO", line2))
-				ret = MU_FMT_GO;	/* settle for any GLO in the second line of the label */
-			for (c = line2 + line2_len + 1, ctop = c + *line3_len, c1 = line3; c < ctop; c++)
-			{	/* if the first 2 lines were really short, move to other buffer looking for a line 3 terminator */
-				if ('\n' == *c)
-				{	/* found a terminator */
-					*line3_len = c1 - line3;
-					break;
-				} else
-					*c1 = *c;
-			}
-			if (c == ctop)
-			{	/* get all or some of line 3 - the first non-label line */
-				ptr = line3 + *line3_len;
-				if (0 < (len = go_get(&ptr, 0, *max_rec_size)))
-				{
-					*line3_len += (len - *dos);
-					c1 = line3 + *line3_len;
-					*c1 = 0;		/* null terminate the line to keep regex in bounds */
-				} else
-				{	/* chances of this are small but we are careful not to overflow buffers */
-					ret = MU_FMT_GOQ;
-					mupip_error_occurred = TRUE;
-					gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
-				}
-			} else
-			{
-				*line3_len = 0;
-				ret = MU_FMT_GOQ;	/* abusing this value to mean not working, as we can't discover GOQ */
-			}
-			if ((MU_FMT_UNRECOG == ret) && gtm_regex_perf("\\^[%A-Za-z][0-9A-Za-z]*(\\(.*\\))?$", line3))
-				ret = MU_FMT_GO;	/* gvn only */
-			if ((MU_FMT_UNRECOG == ret)
-				&& gtm_regex_perf("\\^[%A-Za-z][0-9A-Za-z]*(\\(.*\\))?=(\".*\"|-?([0-9]+|[0-9]*\\.[0-9]+))$",
-					line3))
-				ret = MU_FMT_ZWR;	 /* gvn=val */
-			if (MU_FMT_UNRECOG != ret)
-			{
-				*utf8_extract = gtm_regex_perf("UTF-8", line1);
-				if ((*utf8_extract && !gtm_utf8_mode) || (!*utf8_extract && gtm_utf8_mode))
-				{	/* extract CHSET doesn't match current $ZCHSET */
-					if (*utf8_extract)
-						gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_LOADINVCHSET,
-							2, LEN_AND_LIT("UTF-8"));
-					else
-						gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_LOADINVCHSET, 2, LEN_AND_LIT("M"));
-					*utf8_extract = BADZCHSET;
-				}
-			}
-		} else
-			return MU_FMT_GOQ;
-	} else
-		return MU_FMT_GOQ;
+	}
 	*max_rec_size = (MU_FMT_GO == ret) ? MAX_STRLEN : *max_rec_size;		/* for GO, keys are separate */
 	return MU_FMT_GOQ == ret ? MU_FMT_UNRECOG : ret;				/* turn the GOQs back into unrecognized */
 }