From 92ca424e72cf94c6a52c6442c475fcaf7990cf1d Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Mon, 18 Sep 2023 05:48:58 -0500 Subject: [PATCH] Cleanup pass on csplit. Whitespace (code style, don't mix space/tab, trailing whitespace, etc). Various "don't break after return, don't return after error_exit()", etc. Move global variables into GLOBALS() block with nonzero init in main. Static local functions. --- toys/pending/csplit.c | 230 ++++++++++++++++++++---------------------- 1 file changed, 111 insertions(+), 119 deletions(-) diff --git a/toys/pending/csplit.c b/toys/pending/csplit.c index 8f053ccad..a9200abdb 100644 --- a/toys/pending/csplit.c +++ b/toys/pending/csplit.c @@ -3,11 +3,8 @@ * Copyright 2023 Oliver Webb * * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/csplit.html - * Deviations From POSIX: - * Does not use %d for file size output - * Doesn't do negitive offsets - * GNU Extension: "{*}" * + * Deviations From POSIX: Add "{*}", file size is %ld, no negative offsets USE_CSPLIT(NEWTOY(csplit, "<2skf:n#", TOYFLAG_USR|TOYFLAG_BIN)) @@ -17,23 +14,22 @@ config CSPLIT help usage: csplit [-ks] [-f PREFIX] [-n INTEGER] file arg... - Split files into multiple files based on list of rules - - -k Does not delete Files on error - -s No file output size messages - -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx" - -n [INTEGER] Make all filename numbers [INTEGER] characters long - - Valid Rules: - /regexp/[INTEGER] Break file before line that regexp matches, - %regexp%[INTEGER] - If a offset is specified for these rules, the break will happen [INTEGER] - lines after the regexp match - if a offset is specified, it will break at [INTEGER] lines after the offset - [INTEGER] Break file at line before [INTEGER] - {INTEGER} Repeat Previous Pattern INTEGER Number of times if INTEGER is * - The pattern repeats forever - + Split files into multiple files based on list of rules + + -k Does not delete Files on error + -s No file output size messages + -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx" + -n [INTEGER] Make all filename numbers [INTEGER] characters long + + Valid Rules: + /regexp/[INTEGER] Break file before line that regexp matches, + %regexp%[INTEGER] + If a offset is specified for these rules, the break will happen [INTEGER] + lines after the regexp match + if a offset is specified, it will break at [INTEGER] lines after the offset + [INTEGER] Break file at line before [INTEGER] + {INTEGER} Repeat Previous Pattern INTEGER Number of times if INTEGER is * + The pattern repeats forever */ #define FOR_csplit @@ -42,105 +38,99 @@ config CSPLIT GLOBALS( long n; char *f; -) -size_t indx = 1, findx = 0, lineno = 1; -char *filefmt, *flname, *prefix; -// Variables the context checker need to track between lines -size_t btc = 0, tmp = -1; -int offset = -1, withld = 0, inf = 0; + size_t indx, findx, lineno; + char *filefmt, *prefix; + // Variables the context checker need to track between lines + size_t btc, tmp; + int offset, withld, inf; +) -// This is only int so we can exit cleanly in ternary operators -int abrt(char *err) { +static _Noreturn void abrt(char *err) +{ // Cycle down through index instead of keeping track of what files we made - if (!FLAG(k)) for (; indx>=1; indx--) - remove(xmprintf(filefmt, prefix, findx)); + if (!FLAG(k)) for (; TT.indx>=1; TT.indx--) + remove(xmprintf(TT.filefmt, TT.prefix, TT.findx)); error_exit("%s\n", err); - return 1; } -int rgmatch(char *rxrl, char *line, char *fmt) { +static int rgmatch(char *rxrl, char *line, char *fmt) +{ regex_t rxp; int rr; - sscanf(rxrl,fmt, toybuf, &offset); + + sscanf(rxrl,fmt, toybuf, &TT.offset); xregcomp(&rxp, toybuf, 0); rr = regexec(&rxp, line, 0, 0, 0); if (!rr) return 1; else if (rr == REG_NOMATCH) return 0; - return abrt("bad regex"); + abrt("bad regex"); } -int cntxt(char *line, char *rule) { +static int cntxt(char *line, char *rule) +{ size_t llv; - if (indx == toys.optc) return 0; + if (TT.indx == toys.optc) return 0; - if (offset < 0); - else if (offset == 0) { - offset = -1; - return 1; + if (TT.offset < 0); + else if (TT.offset == 0) { + TT.offset = -1; + + return 1; } else { - offset--; - return 0; + TT.offset--; + + return 0; } switch (rule[0]) { - - case '/': - return rgmatch(rule, line, "/%[^/%]/%d"); - break; - case '%': - withld = 1; - return rgmatch(rule, line, "%%%[^/%]%%%d"); - break; - - case '{': - if (indx < 2) abrt("bad rule order"); - - // GNU extention: {*} - if (!strcmp(rule,"{*}")){ - btc = -1; - inf = 1; - } else if (!sscanf(rule,"{%lu}",&btc)) - abrt("bad rule"); - - if (tmp == -1) tmp = lineno; - if ((llv = atoll(toys.optargs[indx-1]))) { - if (((lineno-tmp) % llv+1) == llv) { - tmp = -1; - indx--; - return 1; - } else return 0; - } - - if (cntxt(line, toys.optargs[indx-1])) { - // Manipulate the rule then return to it later so we create a - // new file but are still on the same rule. This is the only - // reason why we differentiate between rule and file Index - if (btc != 1) { - toys.optargs[indx] = xmprintf("{%lu}",btc-1); - indx--; - } - return 1; - } - return 0; - break; - - default: - if (lineno > atoll(rule)) { - abrt("bad rule order"); - } else if (!(atoll(rule))) { - abrt("bad rule"); - } else { - if (lineno == atoll(rule)) offset++; - return 0; - } - break; + case '/': + return rgmatch(rule, line, "/%[^/%]/%d"); + break; + + case '%': + TT.withld = 1; + return rgmatch(rule, line, "%%%[^/%]%%%d"); + + case '{': + if (TT.indx < 2) abrt("bad rule order"); + + if (!strcmp(rule,"{*}")) { + TT.btc = -1; + TT.inf = 1; + } else if (!sscanf(rule,"{%lu}",&TT.btc)) abrt("bad rule"); + + if (TT.tmp == -1) TT.tmp = TT.lineno; + if ((llv = atoll(toys.optargs[TT.indx-1]))) { + if (((TT.lineno-TT.tmp) % llv+1) == llv) { + TT.tmp = -1; + TT.indx--; + + return 1; + } else return 0; + } + + if (cntxt(line, toys.optargs[TT.indx-1])) { + // Manipulate the rule then return to it later so we create a + // new file but are still on the same rule. This is the only + // reason why we differentiate between rule and file Index + if (TT.btc != 1) { + toys.optargs[TT.indx] = xmprintf("{%lu}",TT.btc-1); + TT.indx--; + } + return 1; + } + return 0; + + default: + if (TT.lineno > atoll(rule)) abrt("bad rule order"); + else if (!(atoll(rule))) abrt("bad rule"); + else { + if (TT.lineno == atoll(rule)) TT.offset++; + return 0; + } } - - // The code should never get to this point without returning something - perror_exit("Error"); - return 1; } void csplit_main(void) @@ -150,32 +140,34 @@ void csplit_main(void) char *line; size_t filesize = 0; + TT.indx = TT.lineno = 1; + TT.tmp = TT.offset = -1; + // -f and -n formatting - filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2); - prefix = TT.f ? TT.f : "xx"; + TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2); + TT.prefix = TT.f ? TT.f : "xx"; - actvfile = xfopen(xmprintf(filefmt, prefix, findx), "w+"); + actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+"); for (; (line = xgetline(fin)); free(line)) { - lineno++; - filesize += strlen(line)+1; - - if (cntxt(line, toys.optargs[indx])) { - - if (!withld) { - fclose(actvfile); - if (!FLAG(s)) printf("%ld\n", filesize); - filesize = 0; - findx++; - actvfile = xfopen(xmprintf(filefmt, prefix, findx), "w+"); - } - - indx++; - withld = 0; - } - if (!withld) fprintf(actvfile, "%s\n", line); + TT.lineno++; + filesize += strlen(line)+1; + + if (cntxt(line, toys.optargs[TT.indx])) { + if (!TT.withld) { + fclose(actvfile); + if (!FLAG(s)) printf("%ld\n", filesize); + filesize = 0; + TT.findx++; + actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+"); + } + + TT.indx++; + TT.withld = 0; + } + if (!TT.withld) fprintf(actvfile, "%s\n", line); } if (!FLAG(s)) printf("%ld\n", filesize); // Abort Case: Not All Rules Processed - if (!((indx == toys.optc) || inf)) abrt("Rules not processed"); + if (!((TT.indx == toys.optc) || TT.inf)) abrt("Rules not processed"); }