diff --git a/src/ConfigParser.cc b/src/ConfigParser.cc index d9e7a2456ea..fb1d86aea3e 100644 --- a/src/ConfigParser.cc +++ b/src/ConfigParser.cc @@ -16,13 +16,14 @@ #include "fatal.h" #include "globals.h" #include "neighbors.h" +#include "parser/Tokenizer.h" #include "sbuf/Stream.h" bool ConfigParser::RecognizeQuotedValues = true; bool ConfigParser::StrictMode = true; std::stack ConfigParser::CfgFiles; ConfigParser::TokenType ConfigParser::LastTokenType = ConfigParser::SimpleToken; -const char *ConfigParser::CfgLine = nullptr; +SBuf ConfigParser::CfgLine; const char *ConfigParser::CfgPos = nullptr; std::queue ConfigParser::CfgLineTokens_; bool ConfigParser::AllowMacros_ = false; @@ -201,10 +202,10 @@ ConfigParser::UnQuote(const char *token, const char **next) } void -ConfigParser::SetCfgLine(char *line) +ConfigParser::SetCfgLine(const SBuf &line) { CfgLine = line; - CfgPos = line; + CfgPos = CfgLine.c_str(); while (!CfgLineTokens_.empty()) { char *token = CfgLineTokens_.front(); CfgLineTokens_.pop(); @@ -561,6 +562,20 @@ ConfigParser::rejectDuplicateDirective() throw TextException("duplicate configuration directive", Here()); } +SBuf +ConfigParser::openDirective(const SBuf &line) +{ + Parser::Tokenizer tk(line); + SBuf directiveName; // TODO: Upgrade cfg_directive to a ConfigParser member (with SBuf type) and set it here. + static const auto &spaceChars = CharacterSet::WSP; + static const auto directiveChars = spaceChars.complement("squid.conf directive name"); + const auto found = tk.prefix(directiveName, directiveChars); + Assure(found); // our callers are expected to fully handle non-directive lines + tk.skipAll(spaceChars); + SetCfgLine(tk.remaining()); // may be empty + return directiveName; +} + void ConfigParser::closeDirective() { diff --git a/src/ConfigParser.h b/src/ConfigParser.h index 1ef9577b4ab..b72367dbbd8 100644 --- a/src/ConfigParser.h +++ b/src/ConfigParser.h @@ -54,6 +54,11 @@ class ConfigParser void destruct(); + /// starts parsing the current configuration directive + /// \returns directive name + /// \sa closeDirective() + SBuf openDirective(const SBuf &line); + /// stops parsing the current configuration directive void closeDirective(); @@ -137,8 +142,8 @@ class ConfigParser */ static char *PeekAtToken(); - /// Set the configuration file line to parse. - static void SetCfgLine(char *line); + /// Set current directive parameters (i.e. characters after the directive name). + static void SetCfgLine(const SBuf &); /// Allow %macros inside quoted strings static void EnableMacros() {AllowMacros_ = true;} @@ -221,8 +226,12 @@ class ConfigParser static char *NextElement(TokenType &type); static std::stack CfgFiles; ///< The stack of open cfg files static TokenType LastTokenType; ///< The type of last parsed element - static const char *CfgLine; ///< The current line to parse - static const char *CfgPos; ///< Pointer to the next element in cfgLine string + + static SBuf CfgLine; ///< Directive parameters being parsed; \sa SetCfgLine() + + // TODO: Replace with a Tokenizer or a similar iterative parsing class + static const char *CfgPos; ///< Pointer to the next element in CfgLine string + static std::queue CfgLineTokens_; ///< Store the list of tokens for current configuration line static bool AllowMacros_; static bool ParseQuotedOrToEol_; ///< The next tokens will be handled as quoted or to_eol token diff --git a/src/Makefile.am b/src/Makefile.am index 9ad46121904..7bbd3e9305d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2748,7 +2748,6 @@ tests_testConfigParser_SOURCES = \ tests/testConfigParser.cc nodist_tests_testConfigParser_SOURCES = \ ConfigParser.cc \ - tests/stub_SBuf.cc \ String.cc \ tests/stub_acl.cc \ tests/stub_cache_cf.cc \ @@ -2757,6 +2756,8 @@ nodist_tests_testConfigParser_SOURCES = \ tests/stub_libmem.cc \ tests/stub_neighbors.cc tests_testConfigParser_LDADD = \ + parser/libparser.la \ + sbuf/libsbuf.la \ base/libbase.la \ $(LIBCPPUNIT_LIBS) \ $(COMPAT_LIB) \ diff --git a/src/cache_cf.cc b/src/cache_cf.cc index 90875f9bdde..435c67e5e52 100644 --- a/src/cache_cf.cc +++ b/src/cache_cf.cc @@ -55,6 +55,7 @@ #include "mgr/Registration.h" #include "neighbors.h" #include "NeighborTypeDomainList.h" +#include "parser/Tokenizer.h" #include "Parsing.h" #include "pconn.h" #include "PeerDigest.h" @@ -181,7 +182,7 @@ static void parse_string(char **); static void default_all(void); static void defaults_if_none(void); static void defaults_postscriptum(void); -static int parse_line(char *); +static int parse_line(const SBuf &); static void parse_obsolete(const char *); static void parseBytesLine(size_t * bptr, const char *units); static void parseBytesLineSigned(ssize_t * bptr, const char *units); @@ -218,7 +219,7 @@ static int check_null_IpAddress_list(const Ip::Address_list *); #endif /* USE_WCCPv2 */ static void parsePortCfg(AnyP::PortCfgPointer *, const char *protocol); -#define parse_PortCfg(l) parsePortCfg((l), token) +#define parse_PortCfg(l) parsePortCfg((l), cfg_directive) static void dump_PortCfg(StoreEntry *, const char *, const AnyP::PortCfgPointer &); #define free_PortCfg(h) *(h)=NULL @@ -311,20 +312,23 @@ skip_ws(const char* s) return s; } +/// Parsers included configuration files identified by their filenames or glob +/// patterns and included at the given nesting level (a.k.a. depth). +/// For example, parses include files in `include /path/to/include/files/*.acl`. +/// \returns the number of errors (that did not immediately terminate parsing) static int -parseManyConfigFiles(char* files, int depth) +parseIncludedConfigFiles(const SBuf &paths, const int depth) { int error_count = 0; - char* saveptr = nullptr; + Parser::Tokenizer tk(paths); #if HAVE_GLOB - char *path; glob_t globbuf; int i; memset(&globbuf, 0, sizeof(globbuf)); - for (path = strwordtok(files, &saveptr); path; path = strwordtok(nullptr, &saveptr)) { - if (glob(path, globbuf.gl_pathc ? GLOB_APPEND : 0, nullptr, &globbuf) != 0) { - int xerrno = errno; - fatalf("Unable to find configuration file: %s: %s", path, xstrerr(xerrno)); + while (auto path = NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(tk)) { + if (glob(path->c_str(), globbuf.gl_pathc ? GLOB_APPEND : 0, nullptr, &globbuf) != 0) { + const auto xerrno = errno; + throw TextException(ToSBuf("Unable to find configuration file: ", *path, ": ", xstrerr(xerrno)), Here()); } } for (i = 0; i < (int)globbuf.gl_pathc; ++i) { @@ -332,118 +336,195 @@ parseManyConfigFiles(char* files, int depth) } globfree(&globbuf); #else - char* file = strwordtok(files, &saveptr); - while (file != NULL) { - error_count += parseOneConfigFile(file, depth); - file = strwordtok(nullptr, &saveptr); + while (auto path = NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(tk)) { + error_count += parseOneConfigFile(path->c_str(), depth); } #endif /* HAVE_GLOB */ return error_count; } +/// Replaces all occurrences of macroName in buf with macroValue. When looking +/// for the next macroName occurrence, this one-scan algorithm does not revisit +/// previously scanned buf areas and does not visit replaced values. static void -ReplaceSubstr(char*& str, int& len, unsigned substrIdx, unsigned substrLen, const char* newSubstr) +SubstituteMacro(SBuf &buf, const SBuf ¯oName, const SBuf ¯oValue) { - assert(str != nullptr); - assert(newSubstr != nullptr); - - unsigned newSubstrLen = strlen(newSubstr); - if (newSubstrLen > substrLen) - str = (char*)realloc(str, len - substrLen + newSubstrLen + 1); - - // move tail part including zero - memmove(str + substrIdx + newSubstrLen, str + substrIdx + substrLen, len - substrIdx - substrLen + 1); - // copy new substring in place - memcpy(str + substrIdx, newSubstr, newSubstrLen); + SBuf remainingInput = buf; + buf.clear(); + while (!remainingInput.isEmpty()) { + const auto pos = remainingInput.find(macroName); + if (pos == SBuf::npos) { + buf.append(remainingInput); + return; + } - len = strlen(str); + buf.append(remainingInput.substr(0, pos)); + buf.append(macroValue); + remainingInput.chop(pos + macroName.length()); + } } static void -SubstituteMacro(char*& line, int& len, const char* macroName, const char* substStr) +ProcessMacros(SBuf &buf) { - assert(line != nullptr); - assert(macroName != nullptr); - assert(substStr != nullptr); - unsigned macroNameLen = strlen(macroName); - while (const char* macroPos = strstr(line, macroName)) // we would replace all occurrences - ReplaceSubstr(line, len, macroPos - line, macroNameLen, substStr); + static const auto macroServiceName = SBuf("${service_name}"); + static const auto macroProcessName = SBuf("${process_name}"); + static const auto macroProcessNumber = SBuf("${process_number}"); + static const auto kidIdentifier = ToSBuf(KidIdentifier); + SubstituteMacro(buf, macroServiceName, service_name); + SubstituteMacro(buf, macroProcessName, TheKidName); + SubstituteMacro(buf, macroProcessNumber, kidIdentifier); } -static void -ProcessMacros(char*& line, int& len) +/// extracts all leading space characters (if any) +/// \returns whether at least one character was extracted +static bool +SkipOptionalSpace(Parser::Tokenizer &tk) { - SubstituteMacro(line, len, "${service_name}", service_name.c_str()); - SubstituteMacro(line, len, "${process_name}", TheKidName.c_str()); - SubstituteMacro(line, len, "${process_number}", xitoa(KidIdentifier)); + return tk.skipAll(CharacterSet::WSP); } -static void -trim_trailing_ws(char* str) +/// extracts all (and at least one) characters matching tokenChars surrounded by optional space +static SBuf +ExtractToken(const char * const description, Parser::Tokenizer &tk, const CharacterSet &tokenChars) +{ + const auto savedTk = tk; + + (void)SkipOptionalSpace(tk); + + SBuf token; + if (tk.prefix(token, tokenChars)) { + (void)SkipOptionalSpace(tk); + return token; + } + + tk = savedTk; + throw TextException(ToSBuf("cannot find ", description, " near ", tk.remaining()), Here()); +} + +/// extracts an operand of a preprocessor condition +static SBuf +ExtractOperand(const char * const description, Parser::Tokenizer &tk) { - assert(str != nullptr); - unsigned i = strlen(str); - while ((i > 0) && IsSpace(str[i - 1])) - --i; - str[i] = '\0'; + static const auto operandChars = (CharacterSet::ALPHA + CharacterSet::DIGIT).add('-').add('+').rename("preprocessor condition operand"); + return ExtractToken(description, tk, operandChars); } -static const char* -FindStatement(const char* line, const char* statement) +/// extracts an operator of a preprocessor condition +static SBuf +ExtractOperator(const char * const description, Parser::Tokenizer &tk) { - assert(line != nullptr); - assert(statement != nullptr); + static const auto operatorChars = CharacterSet("preprocessor condition operator", "<=>%/*^!"); + return ExtractToken(description, tk, operatorChars); +} - const char* str = skip_ws(line); - unsigned len = strlen(statement); - if (strncmp(str, statement, len) == 0) { - str += len; - if (*str == '\0') - return str; - else if (IsSpace(*str)) - return skip_ws(str); +/// throws on non-empty remaining input +static void +RejectTrailingGarbage(const char * const parsedInputDescription, const SBuf &parsedInput, const Parser::Tokenizer &tk) +{ + if (!tk.atEnd()) { + throw TextException(ToSBuf("found trailing garbage after parsing ", + parsedInputDescription, ' ', parsedInput, ": ", + tk.remaining()), Here()); } +} - return nullptr; +/// interprets the given raw string as a signed integer (in decimal, hex, or +/// octal base per Parser::Tokenizer::int64()) +static int64_t +EvalNumber(const SBuf &raw) +{ + auto numberParser = Parser::Tokenizer(raw); + int64_t result = 0; + if (!numberParser.int64(result, 0, true)) + throw TextException(ToSBuf("malformed integer near ", raw), Here()); + RejectTrailingGarbage("integer", raw, numberParser); + return result; } +/// IsIfStatementOpening() helper that interprets input prefix as a preprocessor condition static bool -StrToInt(const char* str, long& number) +EvalBoolExpr(Parser::Tokenizer &tk) { - assert(str != nullptr); + const auto operand = ExtractOperand("preprocessor condition", tk); + + static const auto keywordTrue = SBuf("true"); + if (operand == keywordTrue) + return true; - char* end; - number = strtol(str, &end, 0); + static const auto keywordFalse = SBuf("false"); + if (operand == keywordFalse) + return false; + + const auto lhs = operand; + + const auto op = ExtractOperator("equality sign in an equality condition", tk); + static const auto keywordEqual = SBuf("="); + if (op != keywordEqual) + throw TextException(ToSBuf("expected equality sign (=) but got ", op), Here()); + + const auto rhs = ExtractOperand("right-hand operand of an equality condition", tk); + return EvalNumber(lhs) == EvalNumber(rhs); +} + +/// interprets input as the first line of a preprocessor `if` statement +/// \returns std::nullopt if input does not look like an `if` statement +/// \returns `if` condition value if input is an `if` statement +static std::optional +IsIfStatementOpening(Parser::Tokenizer tk) +{ + // grammar: space* "if" space condition space* END + (void)SkipOptionalSpace(tk); + const auto keywordIf = SBuf("if"); + if (tk.skip(keywordIf) && SkipOptionalSpace(tk)) { + const auto condition = tk.remaining(); + const auto result = EvalBoolExpr(tk); + (void)SkipOptionalSpace(tk); + RejectTrailingGarbage("preprocessor condition", condition, tk); + return result; + } - return (end != str) && (*end == '\0'); // returns true if string contains nothing except number + // e.g., "iffy_error_responses on" + return std::nullopt; } +/// interprets input as an `else` or `endif` line of a preprocessor `if` statement +/// \returns false if input does not look like an `else` or `endif` line static bool -EvalBoolExpr(const char* expr) +IsIfStatementLine(const SBuf &keyword, Parser::Tokenizer tk) { - assert(expr != nullptr); - if (strcmp(expr, "true") == 0) { - return true; - } else if (strcmp(expr, "false") == 0) { - return false; - } else if (const char* equation = strchr(expr, '=')) { - const char* rvalue = skip_ws(equation + 1); - char* lvalue = (char*)xmalloc(equation - expr + 1); - xstrncpy(lvalue, expr, equation - expr + 1); - trim_trailing_ws(lvalue); + // grammar: space* keyword space* END + (void)SkipOptionalSpace(tk); + if (tk.skip(keyword)) { + if (tk.atEnd()) + return true; + + if (SkipOptionalSpace(tk)) { + RejectTrailingGarbage("preprocessor keyword", keyword, tk); + return true; + } + // e.g., "elseif" + } - long number1; - if (!StrToInt(lvalue, number1)) - fatalf("String is not a integer number: '%s'\n", lvalue); - long number2; - if (!StrToInt(rvalue, number2)) - fatalf("String is not a integer number: '%s'\n", rvalue); + return false; +} - xfree(lvalue); - return number1 == number2; +/// interprets input as an `include ` preprocessor directive +/// \returns std::nullopt if input does not look like an `include` statement +/// \returns `include` parameters if input is an `include` statement +static std::optional +IsIncludeLine(Parser::Tokenizer tk) +{ + // grammar: space* "include" space files space* END + (void)SkipOptionalSpace(tk); + const auto keywordInclude = SBuf("include"); + if (tk.skip(keywordInclude) && SkipOptionalSpace(tk)) { + // for simplicity sake, we leave trailing space, if any, in the result + return tk.remaining(); } - fatalf("Unable to evaluate expression '%s'\n", expr); - return false; // this place cannot be reached + + // e.g., "include_version_info allow all" + return std::nullopt; } static int @@ -453,8 +534,6 @@ parseOneConfigFile(const char *file_name, unsigned int depth) const char *orig_cfg_filename = cfg_filename; const int orig_config_lineno = config_lineno; char *token = nullptr; - char *tmp_line = nullptr; - int tmp_line_len = 0; int err_count = 0; int is_pipe = 0; @@ -486,6 +565,9 @@ parseOneConfigFile(const char *file_name, unsigned int depth) config_lineno = 0; + // sequential raw input lines merged to honor line continuation markers + SBuf wholeLine; + std::vector if_states; while (fgets(config_input_line, BUFSIZ, fp)) { ++config_lineno; @@ -535,46 +617,44 @@ parseOneConfigFile(const char *file_name, unsigned int depth) if (config_input_line[0] == '\0') continue; - const char* append = tmp_line_len ? skip_ws(config_input_line) : config_input_line; - - size_t append_len = strlen(append); + wholeLine.append(config_input_line); - tmp_line = (char*)xrealloc(tmp_line, tmp_line_len + append_len + 1); - - strcpy(tmp_line + tmp_line_len, append); - - tmp_line_len += append_len; - - if (tmp_line[tmp_line_len-1] == '\\') { - debugs(3, 5, "parseConfigFile: tmp_line='" << tmp_line << "'"); - tmp_line[--tmp_line_len] = '\0'; + if (!wholeLine.isEmpty() && *wholeLine.rbegin() == '\\') { + debugs(3, 5, "expecting line continuation after " << wholeLine); + wholeLine.chop(0, wholeLine.length() - 1); // drop trailing backslash continue; } - trim_trailing_ws(tmp_line); - ProcessMacros(tmp_line, tmp_line_len); - debugs(3, (opt_parse_cfg_only?1:5), "Processing: " << tmp_line); + ProcessMacros(wholeLine); + auto tk = Parser::Tokenizer(wholeLine); - if (const char* expr = FindStatement(tmp_line, "if")) { - if_states.push_back(EvalBoolExpr(expr)); // store last if-statement meaning - } else if (FindStatement(tmp_line, "endif")) { + // (void)tk.skipAll(CharacterSet::WSP) is not necessary due to earlier skip_ws() + (void)tk.skipAllTrailing(CharacterSet::WSP); + + debugs(3, (opt_parse_cfg_only?1:5), "Processing: " << tk.remaining()); + + static const auto keywordElse = SBuf("else"); + static const auto keywordEndif = SBuf("endif"); + if (const auto condition = IsIfStatementOpening(tk)) { + if_states.push_back(*condition); // store last if-statement meaning + } else if (IsIfStatementLine(keywordEndif, tk)) { if (!if_states.empty()) if_states.pop_back(); // remove last if-statement meaning else fatalf("'endif' without 'if'\n"); - } else if (FindStatement(tmp_line, "else")) { + } else if (IsIfStatementLine(keywordElse, tk)) { if (!if_states.empty()) if_states.back() = !if_states.back(); else fatalf("'else' without 'if'\n"); } else if (if_states.empty() || if_states.back()) { // test last if-statement meaning if present /* Handle includes here */ - if (tmp_line_len >= 9 && strncmp(tmp_line, "include", 7) == 0 && IsSpace(tmp_line[7])) { - err_count += parseManyConfigFiles(tmp_line + 8, depth + 1); + if (const auto files = IsIncludeLine(tk)) { + err_count += parseIncludedConfigFiles(*files, depth + 1); } else { try { - if (!parse_line(tmp_line)) { - debugs(3, DBG_CRITICAL, "ERROR: unrecognized directive near '" << tmp_line << "'" << + if (!parse_line(wholeLine)) { + debugs(3, DBG_CRITICAL, "ERROR: unrecognized directive near '" << wholeLine << "'" << Debug::Extra << "directive location: " << ConfigParser::CurrentLocation()); ++err_count; } @@ -586,9 +666,7 @@ parseOneConfigFile(const char *file_name, unsigned int depth) } } - safe_free(tmp_line); - tmp_line_len = 0; - + wholeLine.clear(); } if (!if_states.empty()) fatalf("if-statement without 'endif'\n"); @@ -605,7 +683,6 @@ parseOneConfigFile(const char *file_name, unsigned int depth) SetConfigFilename(orig_cfg_filename, false); config_lineno = orig_config_lineno; - xfree(tmp_line); return err_count; } @@ -669,10 +746,10 @@ ParseDirective(T &raw, ConfigParser &parser) if (SawDirective(raw)) parser.rejectDuplicateDirective(); - // TODO: parser.openDirective(directiveName); Must(!raw); raw = Configuration::Component::Parse(parser); Must(raw); + // TODO: Move to parse_line() when ready to reject trailing garbage in all directives. parser.closeDirective(); } @@ -4383,20 +4460,20 @@ sslBumpCfgRr::finalizeConfig() { if (lastDeprecatedRule != Ssl::bumpEnd) { assert( lastDeprecatedRule == Ssl::bumpClientFirst || lastDeprecatedRule == Ssl::bumpNone); - static char buf[1024]; + SBuf conversionRule; if (lastDeprecatedRule == Ssl::bumpClientFirst) { - strcpy(buf, "ssl_bump deny all"); + conversionRule = SBuf("ssl_bump deny all"); debugs(3, DBG_CRITICAL, "WARNING: auto-converting deprecated implicit " "\"ssl_bump deny all\" to \"ssl_bump none all\". New ssl_bump configurations " "must not use implicit rules. Update your ssl_bump rules."); } else { - strcpy(buf, "ssl_bump allow all"); + conversionRule = SBuf("ssl_bump allow all"); debugs(3, DBG_CRITICAL, "SECURITY NOTICE: auto-converting deprecated implicit " "\"ssl_bump allow all\" to \"ssl_bump client-first all\" which is usually " "inferior to the newer server-first bumping mode. New ssl_bump" " configurations must not use implicit rules. Update your ssl_bump rules."); } - parse_line(buf); + parse_line(conversionRule); } } diff --git a/src/cf_gen.cc b/src/cf_gen.cc index 2294668b68f..b7b8ad90351 100644 --- a/src/cf_gen.cc +++ b/src/cf_gen.cc @@ -466,13 +466,11 @@ gen_default(const EntryList &head, std::ostream &fout) fout << "static void" << std::endl << "default_line(const char *s)" << std::endl << "{" << std::endl << - " char *tmp_line = xstrdup(s);" << std::endl << - " int len = strlen(tmp_line);" << std::endl << - " ProcessMacros(tmp_line, len);" << std::endl << - " xstrncpy(config_input_line, tmp_line, sizeof(config_input_line));" << std::endl << + " SBuf tmp_line(s);" << std::endl << + " ProcessMacros(tmp_line);" << std::endl << + " xstrncpy(config_input_line, tmp_line.c_str(), sizeof(config_input_line));" << std::endl << " config_lineno++;" << std::endl << " parse_line(tmp_line);" << std::endl << - " xfree(tmp_line);" << std::endl << "}" << std::endl << std::endl; fout << "static void" << std::endl << "default_all(void)" << std::endl << @@ -595,7 +593,7 @@ gen_default_postscriptum(const EntryList &head, std::ostream &fout) void Entry::genParseAlias(const std::string &aName, std::ostream &fout) const { - fout << " if (!strcmp(token, \"" << aName << "\")) {" << std::endl; + fout << " if (directiveName.cmp(\"" << aName << "\") == 0) {" << std::endl; if (ifdef.size()) fout << "#if " << ifdef << std::endl; fout << " cfg_directive = \"" << aName << "\";" << std::endl; @@ -606,7 +604,7 @@ Entry::genParseAlias(const std::string &aName, std::ostream &fout) const // offset line to strip initial whitespace tab byte fout << " debugs(0, DBG_PARSE_NOTE(DBG_IMPORTANT), \"" << aName << " : " << &l[1] << "\");" << std::endl; } - fout << " parse_obsolete(token);"; + fout << " parse_obsolete(cfg_directive);"; } else if (!loc.size() || loc.compare("none") == 0) { fout << "parse_" << type << "();"; } else if (type.find("::") != std::string::npos) { @@ -646,12 +644,9 @@ gen_parse(const EntryList &head, std::ostream &fout) { fout << "static int\n" - "parse_line(char *buff)\n" + "parse_line(const SBuf &buf)\n" "{\n" - "\tchar\t*token;\n" - "\tif ((token = strtok(buff, \" \\t\")) == NULL) \n" - "\t\treturn 1;\t/* ignore empty lines */\n" - "\tConfigParser::SetCfgLine(strtok(nullptr, \"\"));\n"; + "\tconst auto directiveName = LegacyParser.openDirective(buf);\n"; for (const auto &e : head) e.genParse(fout); diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc index 6544fb44823..7ffd52a920a 100644 --- a/src/parser/Tokenizer.cc +++ b/src/parser/Tokenizer.cc @@ -332,3 +332,60 @@ Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit) return result; } +/// NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem() helper that +/// parses an optional "quoted string" sequence at the start of its input. +static std::optional +UnquoteSequence_(Parser::Tokenizer &tk) +{ + if (!tk.skipOne(CharacterSet::DQUOTE)) + return std::nullopt; + + SBuf decoded; + while (!tk.atEnd()) { + SBuf prefix; + static const auto backslashChar = CharacterSet("backslash", "\\"); + static const auto ordinaryChars = (CharacterSet::DQUOTE + backslashChar).complement(); + if (tk.prefix(prefix, ordinaryChars)) + decoded.append(prefix); + + if (tk.skipOne(CharacterSet::DQUOTE)) + return decoded; // well-formed "quoted string" + + if (tk.skipOne(backslashChar)) { + SBuf escaped; + static const auto allChars = CharacterSet().complement("any char"); + if (tk.prefix(escaped, allChars, 1)) + decoded.append(escaped); + else + Assure(tk.atEnd()); // input ends at backslash (which we skip) + } + } + + return decoded; // truncated "quoted string" (no closing quote); may be empty +} + +std::optional +NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(Parser::Tokenizer &tk) +{ + const auto &spaceChars = CharacterSet::WSP; + + (void)tk.skipAll(spaceChars); + if (tk.atEnd()) + return std::nullopt; + + SBuf decoded; + do { + SBuf prefix; + static const auto ordinaryChars = (CharacterSet::DQUOTE + spaceChars).complement(); + if (tk.prefix(prefix, ordinaryChars)) + decoded.append(prefix); + + if (const auto sequence = UnquoteSequence_(tk)) + decoded.append(*sequence); + + if (tk.skipOne(spaceChars)) + break; + } while (!tk.atEnd()); + return decoded; // may be empty (e.g., ` ""` or `"\`) +} + diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h index ffa3076a512..d685ae58a79 100644 --- a/src/parser/Tokenizer.h +++ b/src/parser/Tokenizer.h @@ -12,6 +12,8 @@ #include "base/CharacterSet.h" #include "sbuf/SBuf.h" +#include + /// Generic protocol-agnostic parsing tools namespace Parser { @@ -179,5 +181,11 @@ class Tokenizer } /* namespace Parser */ +/// Extracts the next "word" from Tokenizer::remaining() input using legacy +/// strwordtok() decoding conventions. Allows upgrading strwordtok() callers to +/// SBuf while preserving their "rudimentary" parsing grammar/logic. +/// \returns std::nullopt when remaining input is zero or more isspace(3) characters +std::optional NextWordWhileRemovingDoubleQuotesAndBackslashesInsideThem(Parser::Tokenizer &); + #endif /* SQUID_SRC_PARSER_TOKENIZER_H */ diff --git a/src/tests/testACLMaxUserIP.cc b/src/tests/testACLMaxUserIP.cc index 2c856c21969..be3069bcf57 100644 --- a/src/tests/testACLMaxUserIP.cc +++ b/src/tests/testACLMaxUserIP.cc @@ -71,10 +71,8 @@ MyTestProgram::startup() void TestACLMaxUserIP::testParseLine() { - /* a config line to pass with a lead-in token to seed the parser. */ - char * line = xstrdup("test max_user_ip -s 1"); /* seed the parser */ - ConfigParser::SetCfgLine(line); + ConfigParser::SetCfgLine(SBuf("test max_user_ip -s 1")); ConfigParser LegacyParser; Acl::Node::ParseNamedAcl(LegacyParser, Config.namedAcls); CPPUNIT_ASSERT(Config.namedAcls); @@ -90,7 +88,6 @@ TestACLMaxUserIP::testParseLine() CPPUNIT_ASSERT_EQUAL(true, maxUserIpACL->valid()); } Acl::FreeNamedAcls(&Config.namedAcls); - xfree(line); } int diff --git a/src/tests/testConfigParser.cc b/src/tests/testConfigParser.cc index 3a8e1a63a67..34085d4cb8e 100644 --- a/src/tests/testConfigParser.cc +++ b/src/tests/testConfigParser.cc @@ -9,6 +9,7 @@ #include "squid.h" #include "compat/cppunit.h" #include "ConfigParser.h" +#include "sbuf/SBuf.h" #include "SquidString.h" #include "unitTestMain.h" @@ -31,18 +32,11 @@ CPPUNIT_TEST_SUITE_REGISTRATION( TestConfigParser ); int shutting_down = 0; -bool TestConfigParser::doParseQuotedTest(const char *s, const char *expectInterp) +bool +TestConfigParser::doParseQuotedTest(const char * const cfgparam, const char * const expectInterp) { - char cfgline[2048]; - char cfgparam[2048]; - snprintf(cfgline, 2048, "%s", s); - - // Keep the initial value on cfgparam. The ConfigParser methods will write on cfgline - strncpy(cfgparam, cfgline, sizeof(cfgparam)-1); - cfgparam[sizeof(cfgparam)-1] = '\0'; - // Initialize parser to point to the start of quoted string - ConfigParser::SetCfgLine(cfgline); + ConfigParser::SetCfgLine(SBuf(cfgparam)); String unEscaped = ConfigParser::NextToken(); const bool interpOk = (unEscaped.cmp(expectInterp) == 0); diff --git a/src/tests/testRock.cc b/src/tests/testRock.cc index bb89e8c5296..d212baf3393 100644 --- a/src/tests/testRock.cc +++ b/src/tests/testRock.cc @@ -91,17 +91,13 @@ TestRock::setUp() char *path=xstrdup(TESTDIR); - char *config_line=xstrdup("10 max-size=16384"); - - ConfigParser::SetCfgLine(config_line); + ConfigParser::SetCfgLine(SBuf("10 max-size=16384")); store->parse(0, path); store_maxobjsize = 1024*1024*2; safe_free(path); - safe_free(config_line); - /* ok, ready to create */ store->create(); diff --git a/src/tests/testUfs.cc b/src/tests/testUfs.cc index 5daec5e5b99..a2b1a3132d9 100644 --- a/src/tests/testUfs.cc +++ b/src/tests/testUfs.cc @@ -121,19 +121,15 @@ TestUfs::testUfsSearch() char *path=xstrdup(TESTDIR); - char *config_line=xstrdup("100 1 1"); - visible_appname_string = xstrdup(PACKAGE "/" VERSION); - ConfigParser::SetCfgLine(config_line); + ConfigParser::SetCfgLine(SBuf("100 1 1")); aStore->parse(0, path); store_maxobjsize = 1024*1024*2; safe_free(path); - safe_free(config_line); - /* ok, ready to create */ aStore->create(); @@ -240,11 +236,9 @@ TestUfs::testUfsDefaultEngine() mem_policy = createRemovalPolicy(Config.replPolicy); char *path=xstrdup(TESTDIR); - char *config_line=xstrdup("100 1 1"); - ConfigParser::SetCfgLine(config_line); + ConfigParser::SetCfgLine(SBuf("100 1 1")); aStore->parse(0, path); safe_free(path); - safe_free(config_line); CPPUNIT_ASSERT(aStore->IO->io != nullptr); free_cachedir(&Config.cacheSwap); diff --git a/test-suite/squidconf/pp-ifs.conf b/test-suite/squidconf/pp-ifs.conf new file mode 100644 index 00000000000..2d3ddaf8be0 --- /dev/null +++ b/test-suite/squidconf/pp-ifs.conf @@ -0,0 +1,27 @@ +## Copyright (C) 1996-2023 The Squid Software Foundation and contributors +## +## Squid software is distributed under GPLv2+ license and includes +## contributions from numerous individuals and organizations. +## Please see the COPYING and CONTRIBUTORS files for details. +## + +if true + if 10 = 0xA + if ${process_number} = 9223372036854775807 + else + if 0=000 + if -0= +0 + # if -1=+1 + if false + unreachable_directive + else + if preprocessor rejects this particular malformed line, then the test passes + endif + # endif + endif + endif + endif + else + if this unreachable statement was reached then preprocessor failed + endif +endif diff --git a/test-suite/squidconf/pp-ifs.conf.instructions b/test-suite/squidconf/pp-ifs.conf.instructions new file mode 100644 index 00000000000..bf16ed5ad87 --- /dev/null +++ b/test-suite/squidconf/pp-ifs.conf.instructions @@ -0,0 +1 @@ +expect-failure FATAL:.*particular.malformed.line