From 02d0af26a21525db54fdb6ee8b009fd13022ad90 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Mon, 9 Nov 2020 11:19:06 +0300 Subject: [PATCH] Nimgrep improvements 2 (#15612) * nimgrep: speed up by threads and Channels * nimgrep: add --bin, --text, --count options * nimgrep: add --sortTime option * allow Peg in all matches including --includeFile, --excludeFile, --excludeDir * add --match and --noMatch options * add --includeDir option * add --limit (-m) and --onlyAscii (-o) options * fix performance regression introduced in nimgrep improvements #12779 * better error handling * add option --fit * fix groups in --replace * fix flushing, --replace, improve --count * use "." as the default directory, not full path * fix --fit for Windows * force target to C for macosx * validate non-negative int input for options #15318 * switch nimgrep to using --gc:orc * address review: implement cropping in matches,... * implement stdin/pipe & revise --help * address stylistic review & add limitations --- testament/categories.nim | 3 + tools/nimgrep.nim | 1334 +++++++++++++++++++++++++++++--------- tools/nimgrep.nim.cfg | 9 +- 3 files changed, 1049 insertions(+), 297 deletions(-) diff --git a/testament/categories.nim b/testament/categories.nim index 07d35f6fb04c1..fa9391055b270 100644 --- a/testament/categories.nim +++ b/testament/categories.nim @@ -262,6 +262,9 @@ proc debuggerTests(r: var TResults, cat: Category, options: string) = if fileExists("tools/nimgrep.nim"): var t = makeTest("tools/nimgrep", options & " --debugger:on", cat) t.spec.action = actionCompile + # force target to C because of MacOS 10.15 SDK headers bug + # https://github.com/nim-lang/Nim/pull/15612#issuecomment-712471879 + t.spec.targets = { targetC } testSpec r, t # ------------------------- JS tests ------------------------------------------ diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 138f1680c1f75..1e563435c2989 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -8,86 +8,247 @@ # import - os, strutils, parseopt, pegs, re, terminal + os, strutils, parseopt, pegs, re, terminal, osproc, tables, algorithm, times const - Version = "1.5" - Usage = "nimgrep - Nim Grep Utility Version " & Version & """ + Version = "1.6.0" + Usage = "nimgrep - Nim Grep Searching and Replacement Utility Version " & + Version & """ + + (c) 2012-2020 Andreas Rumpf - (c) 2012 Andreas Rumpf Usage: - nimgrep [options] [pattern] [replacement] (file/directory)* +* To search: + nimgrep [options] PATTERN [(FILE/DIRECTORY)*/-] +* To replace: + nimgrep [options] PATTERN --replace REPLACEMENT (FILE/DIRECTORY)*/- +* To list file names: + nimgrep [options] --filenames [PATTERN] [(FILE/DIRECTORY)*] + +Positional arguments, from left to right: +* PATERN is either Regex (default) or Peg if --peg is specified. + PATTERN and REPLACEMENT should be skipped when --stdin is specified. +* REPLACEMENT supports $1, $# notations for captured groups in PATTERN. + Note: --replace mode DOES NOT ask confirmation unless --confirm is specified! +* Final arguments are a list of paths (FILE/DIRECTORY) or a standalone + minus '-' (pipe) or not specified (empty). Note for the empty case: when + no FILE/DIRECTORY/- is specified nimgrep DOES NOT read the pipe, but + searches files in the current dir instead! + - read buffer once from stdin: pipe or terminal input; + in --replace mode the result is directed to stdout; + it's not compatible with --stdin, --filenames, --confirm + (empty) current directory '.' is assumed (not with --replace) + For any given DIRECTORY nimgrep searches only its immediate files without + traversing sub-directories unless --recursive is specified. + In replacement mode all 3 positional arguments are required to avoid damaging. + Options: - --find, -f find the pattern (default) - --replace, -! replace the pattern - --peg pattern is a peg - --re pattern is a regular expression (default) +* Mode of operation: + --find, -f find the PATTERN (default) + --replace, -! replace the PATTERN to REPLACEMENT, rewriting the files + --confirm confirm each occurrence/replacement; there is a chance + to abort any time without touching the file + --filenames just list filenames. Provide a PATTERN to find it in + the filenames (not in the contents of a file) or run + with empty pattern to just list all files: + nimgrep --filenames # In current directory + nimgrep --filenames "" DIRECTORY # Note empty pattern "" + +* Interprete patterns: + --peg PATTERN and PAT are Peg + --re PATTERN and PAT are regular expressions (default) --rex, -x use the "extended" syntax for the regular expression so that whitespace is not significant + --word, -w matches should have word boundaries (buggy for pegs!) + --ignoreCase, -i be case insensitive in PATTERN and PAT + --ignoreStyle, -y be style insensitive in PATTERN and PAT + NOTE: PATERN and patterns PAT (see below in other options) are all either + Regex or Peg simultaneously and options --rex, --word, --ignoreCase, + --ignoreStyle are applied to all of them. + +* File system walk: --recursive, -r process directories recursively --follow follow all symlinks when processing recursively - --confirm confirm each occurrence/replacement; there is a chance - to abort any time without touching the file - --stdin read pattern from stdin (to avoid the shell's confusing - quoting rules) - --word, -w the match should have word boundaries (buggy for pegs!) - --ignoreCase, -i be case insensitive - --ignoreStyle, -y be style insensitive --ext:EX1|EX2|... only search the files with the given extension(s), empty one ("--ext") means files with missing extension --noExt:EX1|... exclude files having given extension(s), use empty one to skip files with no extension (like some binary files are) - --includeFile:PAT include only files whose names match the given regex PAT - --excludeFile:PAT skip files whose names match the given regex pattern PAT - --excludeDir:PAT skip directories whose names match the given regex PAT - --nocolor output will be given without any colours - --color[:always] force color even if output is redirected + --includeFile:PAT search only files whose names contain pattern PAT + --excludeFile:PAT skip files whose names contain pattern PAT + --includeDir:PAT search only files with whole directory path containing PAT + --excludeDir:PAT skip directories whose name (not path) contain pattern PAT + --if,--ef,--id,--ed abbreviations of 4 options above + --sortTime order files by the last modification time (default: off): + -s[:asc|desc] ascending (recent files go last) or descending + +* Filter file content: + --match:PAT select files containing a (not displayed) match of PAT + --noMatch:PAT select files not containing any match of PAT + --bin:on|off|only process binary files? (detected by \0 in first 1K bytes) + (default: on - binary and text files treated the same way) + --text, -t process only text files, the same as --bin:off + +* Represent results: + --nocolor output will be given without any colors + --color[:on] force color even if output is redirected (default: auto) --colorTheme:THEME select color THEME from 'simple' (default), 'bnw' (black and white) ,'ack', or 'gnu' (GNU grep) + --count only print counts of matches for files that matched + --context:N, -c:N print N lines of leading context before every match and + N lines of trailing context after it (default N: 0) --afterContext:N, -a:N print N lines of trailing context after every match --beforeContext:N, -b:N print N lines of leading context before every match - --context:N, -c:N print N lines of leading context before every match and - N lines of trailing context after it --group, -g group matches by file --newLine, -l display every matching line starting from a new line + --cols[:N] limit max displayed columns/width of output lines from + files by N characters, cropping overflows (default: off) + --cols:auto, -% calculate columns from terminal width for every line + --onlyAscii, -@ display only printable ASCII Latin characters 0x20-0x7E + substitutions: 0 -> ^@, 1 -> ^A, ... 0x1F -> ^_, + 0x7F -> '7F, ..., 0xFF -> 'FF +* Miscellaneous: + --threads:N, -j:N speed up search by N additional workers (default: 0, off) + --stdin read PATTERN from stdin (to avoid the shell's confusing + quoting rules) and, if --replace given, REPLACEMENT --verbose be verbose: list every processed file - --filenames find the pattern in the filenames, not in the contents - of the file --help, -h shows this help --version, -v shows the version """ +# Limitations / ideas / TODO: +# * No unicode support with --cols +# * Consider making --onlyAscii default, since dumping binary data has +# stability and security repercussions +# * Mode - reads entire buffer by whole from stdin, which is bad for streaming. +# To implement line-by-line reading after adding option to turn off +# multiline matches +# * Add some form of file pre-processing, e.g. feed binary files to utility +# `strings` and then do the search inside these strings +# * Add --showCol option to also show column (of match), not just line; it +# makes it easier when jump to line+col in an editor or on terminal + + +# Search results for a file are modelled by these levels: +# FileResult -> Block -> Output/Chunk -> SubLine +# +# 1. SubLine is an entire line or its part. +# +# 2. Chunk, which is a sequence of SubLine, represents a match and its +# surrounding context. +# Output is a Chunk or one of auxiliary results like an openError. +# +# 3. Block, which is a sequence of Chunks, is not present as a separate type. +# It will just be separated from another Block by newline when there is +# more than 3 lines in it. +# Here is an example of a Block where only 1 match is found and +# 1 line before and 1 line after of context are required: +# +# ...a_line_before...................................... <<\n") - elif newLine: + elif newLine and optFilenames notin options and optPipe notin options: if oneline: printBlockFile(filename) printBlockLineN(":" & $line & ":") @@ -231,234 +405,434 @@ proc blockHeader(filename: string, line: int|string, replMode=false) = printBlockLineN($line.`$`.align(alignment) & ":") stdout.write("\n") -proc lineHeader(filename: string, line: int|string, isMatch: bool) = +proc newLn(curCol: var Column) = + stdout.write("\n") + curCol.file = 0 + curCol.terminal = 0 + +# We reserve 10+3 chars on the right in --cols mode (optLimitChars). +# If the current match touches this right margin, subLine before it will +# be cropped (even if space is enough for subLine after the match — we +# currently don't have a way to know it since we get it afterwards). +const matchPaddingFromRight = 10 +const ellipsis = "..." + +proc lineHeader(filename: string, line: int|string, isMatch: bool, + curCol: var Column) = let lineSym = if isMatch: $line & ":" else: $line & " " - if not newLine: + if not newLine and optFilenames notin options and optPipe notin options: if oneline: printFile(filename) printLineN(":" & lineSym, isMatch) + curcol.terminal += filename.len + 1 + lineSym.len else: printLineN(lineSym.align(alignment+1), isMatch) - stdout.write(" ") + curcol.terminal += lineSym.align(alignment+1).len + stdout.write(" "); curCol.terminal += 1 + curCol.terminal = curCol.terminal mod termWidth + if optLimitChars in options and + curCol.terminal > limitCharUsr - matchPaddingFromRight - ellipsis.len: + newLn(curCol) + +proc reserveChars(mi: MatchInfo): int = + if optLimitChars in options: + let patternChars = afterPattern(mi.match, 0) + 1 + result = patternChars + ellipsis.len + matchPaddingFromRight + else: + result = 0 + +# Our substitutions of non-printable symbol to ASCII character are similar to +# those of programm 'less'. +const lowestAscii = 0x20 # lowest ASCII Latin printable symbol (@) +const largestAscii = 0x7e +const by2ascii = 2 # number of ASCII chars to represent chars < lowestAscii +const by3ascii = 3 # number of ASCII chars to represent chars > largestAscii + +proc printExpanded(s: string, curCol: var Column, isMatch: bool, + limitChar: int) = + # Print taking into account tabs and optOnlyAscii (and also optLimitChar: + # the proc called from printCropped but we need to check column < limitChar + # also here, since exact cut points are known only after tab expansion). + # With optOnlyAscii non-ascii chars are highlighted even in matches. + # + # use buffer because: + # 1) we need to print non-ascii character inside matches while keeping the + # amount of color escape sequences minimal. + # 2) there is a report that fwrite buffering is slow on MacOS + # https://github.com/nim-lang/Nim/pull/15612#discussion_r510538326 + const bufSize = 8192 # typical for fwrite too + var buffer: string + const normal = 0 + const special = 1 + var lastAdded = normal + template dumpBuf() = + if lastAdded == normal: + printContents(buffer, isMatch) + else: + printSpecial(buffer) + template addBuf(i: int, s: char|string, size: int) = + if lastAdded != i or buffer.len + size > bufSize: + dumpBuf() + buffer.setlen(0) + buffer.add s + lastAdded = i + for c in s: + let charsAllowed = limitChar - curCol.terminal + if charsAllowed <= 0: + break + if lowestAscii <= int(c) and int(c) <= largestAscii: # ASCII latin + addBuf(normal, c, 1) + curCol.file += 1; curCol.terminal += 1 + elif (not optOnlyAscii) and c != '\t': # the same, print raw + addBuf(normal, c, 1) + curCol.file += 1; curCol.terminal += 1 + elif c == '\t': + let spaces = 8 - (curCol.file mod 8) + let spacesAllowed = min(spaces, charsAllowed) + curCol.file += spaces + curCol.terminal += spacesAllowed + if expandTabs: + if optOnlyAscii: # print a nice box for tab + addBuf(special, " ", 1) + addBuf(normal, " ".repeat(spacesAllowed-1), spacesAllowed-1) + else: + addBuf(normal, " ".repeat(spacesAllowed), spacesAllowed) + else: + addBuf(normal, '\t', 1) + else: # substitute characters that are not ACSII Latin + if int(c) < lowestAscii: + let substitute = char(int(c) + 0x40) # use common "control codes" + addBuf(special, "^" & substitute, by2ascii) + curCol.terminal += by2ascii + else: # int(c) > largestAscii + curCol.terminal += by3ascii + let substitute = '\'' & c.BiggestUInt.toHex(2) + addBuf(special, substitute, by3ascii) + curCol.file += 1 + if buffer.len > 0: + dumpBuf() + +template nextCharacter(c: char, file: var int, term: var int) = + if lowestAscii <= int(c) and int(c) <= largestAscii: # ASCII latin + file += 1 + term += 1 + elif (not optOnlyAscii) and c != '\t': # the same, print raw + file += 1 + term += 1 + elif c == '\t': + term += 8 - (file mod 8) + file += 8 - (file mod 8) + elif int(c) < lowestAscii: + file += 1 + term += by2ascii + else: # int(c) > largestAscii: + file += 1 + term += by3ascii + +proc calcTermLen(s: string, firstCol: int, chars: int, fromLeft: bool): int = + # calculate additional length added by Tabs expansion and substitutions + var col = firstCol + var first, last: int + if fromLeft: + first = max(0, s.len - chars) + last = s.len - 1 + else: + first = 0 + last = min(s.len - 1, chars - 1) + for c in s[first .. last]: + nextCharacter(c, col, result) + +proc printCropped(s: string, curCol: var Column, fromLeft: bool, + limitChar: int, isMatch = false) = + # print line `s`, may be cropped if option --cols was set + const eL = ellipsis.len + if optLimitChars notin options: + if not expandTabs and not optOnlyAscii: # for speed mostly + printContents(s, isMatch) + else: + printExpanded(s, curCol, isMatch, limitChar) + else: # limit columns, expand Tabs is also forced + var charsAllowed = limitChar - curCol.terminal + if fromLeft and charsAllowed < eL: + charsAllowed = eL + if (not fromLeft) and charsAllowed <= 0: + # already overflown and ellipsis shold be in place + return + let fullLenWithin = calcTermLen(s, curCol.file, charsAllowed, fromLeft) + # additional length from Tabs and special symbols + let addLen = fullLenWithin - min(s.len, charsAllowed) + # determine that the string is guaranteed to fit within `charsAllowed` + let fits = + if s.len > charsAllowed: + false + else: + if isMatch: fullLenWithin <= charsAllowed - eL + else: fullLenWithin <= charsAllowed + if fits: + printExpanded(s, curCol, isMatch, limitChar = high(int)) + else: + if fromLeft: + printBold ellipsis + curCol.terminal += eL + # find position `pos` where the right side of line will fit charsAllowed + var col = 0 + var term = 0 + var pos = min(s.len, max(0, s.len - (charsAllowed - eL))) + while pos <= s.len - 1: + let c = s[pos] + nextCharacter(c, col, term) + if term >= addLen: + break + inc pos + curCol.file = pos + # TODO don't expand tabs when cropped from the left - difficult, meaningless + printExpanded(s[pos .. s.len - 1], curCol, isMatch, + limitChar = high(int)) + else: + let last = max(-1, min(s.len - 1, charsAllowed - eL - 1)) + printExpanded(s[0 .. last], curCol, isMatch, limitChar-eL) + let numDots = limitChar - curCol.terminal + printBold ".".repeat(numDots) + curCol.terminal = limitChar -proc printMatch(fileName: string, mi: MatchInfo) = - let lines = mi.match.splitLines() - for i, l in lines: +proc printMatch(fileName: string, mi: MatchInfo, curCol: var Column) = + let sLines = mi.match.splitLines() + for i, l in sLines: if i > 0: - lineHeader(filename, mi.lineBeg + i, isMatch = true) - writeColored(l) - if i < lines.len - 1: - stdout.write("\n") + lineHeader(filename, mi.lineBeg + i, isMatch = true, curCol) + let charsAllowed = limitCharUsr - curCol.terminal + if charsAllowed > 0: + printCropped(l, curCol, fromLeft = false, limitCharUsr, isMatch = true) + else: + curCol.overflowMatches += 1 + if i < sLines.len - 1: + newLn(curCol) + +proc getSubLinesBefore(buf: string, curMi: MatchInfo): string = + let first = beforePattern(buf, curMi.first-1, linesBefore+1) + result = substr(buf, first, curMi.first-1) -proc printLinesBefore(si: SearchInfo, curMi: MatchInfo, nLines: int, - replMode=false) = +proc printSubLinesBefore(filename: string, beforeMatch: string, lineBeg: int, + curCol: var Column, reserveChars: int, + replMode=false) = # start block: print 'linesBefore' lines before current match `curMi` - let first = beforePattern(si.buf, curMi.first-1, nLines) - let lines = splitLines(substr(si.buf, first, curMi.first-1)) - let startLine = curMi.lineBeg - lines.len + 1 - blockHeader(si.filename, curMi.lineBeg, replMode=replMode) - for i, l in lines: - lineHeader(si.filename, startLine + i, isMatch = (i == lines.len - 1)) - stdout.write(l) - if i < lines.len - 1: - stdout.write("\n") + let sLines = splitLines(beforeMatch) + let startLine = lineBeg - sLines.len + 1 + blockHeader(filename, lineBeg, replMode=replMode) + for i, l in sLines: + let isLastLine = i == sLines.len - 1 + lineHeader(filename, startLine + i, isMatch = isLastLine, curCol) + let limit = if isLastLine: limitCharUsr - reserveChars else: limitCharUsr + l.printCropped(curCol, fromLeft = isLastLine, limitChar = limit) + if not isLastLine: + newLn(curCol) + +proc getSubLinesAfter(buf: string, mi: MatchInfo): string = + let last = afterPattern(buf, mi.last+1, 1+linesAfter) + let skipByte = # workaround posix: suppress extra line at the end of file + if (last == buf.len-1 and buf.len >= 2 and + buf[^1] == '\l' and buf[^2] != '\c'): 1 + else: 0 + result = substr(buf, mi.last+1, last - skipByte) + +proc printOverflow(filename: string, line: int, curCol: var Column) = + if curCol.overflowMatches > 0: + lineHeader(filename, line, isMatch = true, curCol) + printBold("(" & $curCol.overflowMatches & " matches skipped)") + newLn(curCol) + curCol.overflowMatches = 0 -proc printLinesAfter(si: SearchInfo, mi: MatchInfo, nLines: int) = +proc printSubLinesAfter(filename: string, afterMatch: string, matchLineEnd: int, + curCol: var Column) = # finish block: print 'linesAfter' lines after match `mi` - let s = si.buf - let last = afterPattern(s, mi.last+1, nLines) - let lines = splitLines(substr(s, mi.last+1, last)) - if lines.len == 0: # EOF - stdout.write("\n") + let sLines = splitLines(afterMatch) + if sLines.len == 0: # EOF + newLn(curCol) else: - stdout.write(lines[0]) # complete the line after match itself - stdout.write("\n") - let skipLine = # workaround posix line ending at the end of file - if last == s.len-1 and s.len >= 2 and s[^1] == '\l' and s[^2] != '\c': 1 - else: 0 - for i in 1 ..< lines.len - skipLine: - lineHeader(si.filename, mi.lineEnd + i, isMatch = false) - stdout.write(lines[i]) - stdout.write("\n") - if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") + sLines[0].printCropped(curCol, fromLeft = false, limitCharUsr) + # complete the line after the match itself + newLn(curCol) + printOverflow(filename, matchLineEnd, curCol) + for i in 1 ..< sLines.len: + lineHeader(filename, matchLineEnd + i, isMatch = false, curCol) + sLines[i].printCropped(curCol, fromLeft = false, limitCharUsr) + newLn(curCol) -proc printBetweenMatches(si: SearchInfo, prevMi: MatchInfo, curMi: MatchInfo) = +proc getSubLinesBetween(buf: string, prevMi: MatchInfo, + curMi: MatchInfo): string = + buf.substr(prevMi.last+1, curMi.first-1) + +proc printBetweenMatches(filename: string, betweenMatches: string, + lastLineBeg: int, + curCol: var Column, reserveChars: int) = # continue block: print between `prevMi` and `curMi` - let lines = si.buf.substr(prevMi.last+1, curMi.first-1).splitLines() - stdout.write(lines[0]) # finish the line of previous Match - if lines.len > 1: - stdout.write("\n") - for i in 1 ..< lines.len: - lineHeader(si.filename, prevMi.lineEnd + i, - isMatch = (i == lines.len - 1)) - stdout.write(lines[i]) - if i < lines.len - 1: - stdout.write("\n") - -proc printContextBetween(si: SearchInfo, prevMi, curMi: MatchInfo) = - # print context after previous match prevMi and before current match curMi - let nLinesBetween = curMi.lineBeg - prevMi.lineEnd - if nLinesBetween <= linesAfter + linesBefore + 1: # print as 1 block - printBetweenMatches(si, prevMi, curMi) - else: # finalize previous block and then print next block - printLinesAfter(si, prevMi, 1+linesAfter) - printLinesBefore(si, curMi, linesBefore+1) - -proc printReplacement(si: SearchInfo, mi: MatchInfo, repl: string, - showRepl: bool, curPos: int, + let sLines = betweenMatches.splitLines() + sLines[0].printCropped(curCol, fromLeft = false, limitCharUsr) + # finish the line of previous Match + if sLines.len > 1: + newLn(curCol) + printOverflow(filename, lastLineBeg - sLines.len + 1, curCol) + for i in 1 ..< sLines.len: + let isLastLine = i == sLines.len - 1 + lineHeader(filename, lastLineBeg - sLines.len + i + 1, + isMatch = isLastLine, curCol) + let limit = if isLastLine: limitCharUsr - reserveChars else: limitCharUsr + sLines[i].printCropped(curCol, fromLeft = isLastLine, limitChar = limit) + if not isLastLine: + newLn(curCol) + +proc printReplacement(fileName: string, buf: string, mi: MatchInfo, + repl: string, showRepl: bool, curPos: int, newBuf: string, curLine: int) = - printLinesBefore(si, mi, linesBefore+1) - printMatch(si.fileName, mi) - printLinesAfter(si, mi, 1+linesAfter) + var curCol: Column + printSubLinesBefore(fileName, getSubLinesBefore(buf, mi), mi.lineBeg, + curCol, reserveChars(mi)) + printMatch(fileName, mi, curCol) + printSubLinesAfter(fileName, getSubLinesAfter(buf, mi), mi.lineEnd, curCol) stdout.flushFile() if showRepl: - let newSi: SearchInfo = (buf: newBuf, filename: si.filename) let miForNewBuf: MatchInfo = (first: newBuf.len, last: newBuf.len, lineBeg: curLine, lineEnd: curLine, match: "") - printLinesBefore(newSi, miForNewBuf, linesBefore+1, replMode=true) + printSubLinesBefore(fileName, getSubLinesBefore(newBuf, miForNewBuf), + miForNewBuf.lineBeg, curCol, reserveChars(miForNewBuf), + replMode=true) let replLines = countLineBreaks(repl, 0, repl.len-1) let miFixLines: MatchInfo = (first: mi.first, last: mi.last, lineBeg: curLine, lineEnd: curLine + replLines, match: repl) - printMatch(si.fileName, miFixLines) - printLinesAfter(si, miFixLines, 1+linesAfter) + printMatch(fileName, miFixLines, curCol) + printSubLinesAfter(fileName, getSubLinesAfter(buf, miFixLines), + miFixLines.lineEnd, curCol) + if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") stdout.flushFile() -proc doReplace(si: SearchInfo, mi: MatchInfo, i: int, r: string; - newBuf: var string, curLine: var int, reallyReplace: var bool) = - newBuf.add(si.buf.substr(i, mi.first-1)) - inc(curLine, countLineBreaks(si.buf, i, mi.first-1)) +proc replace1match(filename: string, buf: string, mi: MatchInfo, i: int, + r: string; newBuf: var string, curLine: var int): bool = + newBuf.add(buf.substr(i, mi.first-1)) + inc(curLine, countLineBreaks(buf, i, mi.first-1)) if optConfirm in options: - printReplacement(si, mi, r, showRepl=true, i, newBuf, curLine) + printReplacement(filename, buf, mi, r, showRepl=true, i, newBuf, curLine) case confirm() of ceAbort: quit(0) - of ceYes: reallyReplace = true + of ceYes: gVar.reallyReplace = true of ceAll: - reallyReplace = true + gVar.reallyReplace = true options.excl(optConfirm) of ceNo: - reallyReplace = false + gVar.reallyReplace = false of ceNone: - reallyReplace = false + gVar.reallyReplace = false options.excl(optConfirm) - else: - printReplacement(si, mi, r, showRepl=reallyReplace, i, newBuf, curLine) - if reallyReplace: + elif optPipe notin options: + printReplacement(filename, buf, mi, r, showRepl=gVar.reallyReplace, i, + newBuf, curLine) + if gVar.reallyReplace: + result = true newBuf.add(r) inc(curLine, countLineBreaks(r, 0, r.len-1)) else: newBuf.add(mi.match) inc(curLine, countLineBreaks(mi.match, 0, mi.match.len-1)) -proc processFile(pattern; filename: string; counter: var int, errors: var int) = - var filenameShown = false - template beforeHighlight = - if not filenameShown and optVerbose notin options and not oneline: - printBlockFile(filename) - stdout.write("\n") - stdout.flushFile() - filenameShown = true +template updateCounters(output: Output) = + case output.kind + of blockFirstMatch, blockNextMatch: inc(gVar.matches) + of justCount: inc(gVar.matches, output.matches) + of openError: inc(gVar.errors) + of rejected, blockEnd, fileContents, outputFileName: discard - var buffer: string - if optFilenames in options: - buffer = filename - else: - try: - buffer = system.readFile(filename) - except IOError: - printError "Error: cannot open file: " & filename - inc(errors) - return - if optVerbose in options: - printFile(filename) - stdout.write("\n") - stdout.flushFile() - var result: string +proc printInfo(filename:string, output: Output) = + case output.kind + of openError: + printError("can not open path " & filename & " " & output.msg) + of rejected: + if optVerbose in options: + echo "(rejected: ", output.reason, ")" + of justCount: + echo " (" & $output.matches & " matches)" + of blockFirstMatch, blockNextMatch, blockEnd, fileContents, outputFileName: + discard - if optReplace in options: - result = newStringOfCap(buffer.len) +proc printOutput(filename: string, output: Output, curCol: var Column) = + case output.kind + of openError, rejected, justCount: printInfo(filename, output) + of fileContents: discard # impossible + of outputFileName: + printCropped(output.name, curCol, fromLeft=false, limitCharUsr) + newLn(curCol) + of blockFirstMatch: + printSubLinesBefore(filename, output.pre, output.match.lineBeg, + curCol, reserveChars(output.match)) + printMatch(filename, output.match, curCol) + of blockNextMatch: + printBetweenMatches(filename, output.pre, output.match.lineBeg, + curCol, reserveChars(output.match)) + printMatch(filename, output.match, curCol) + of blockEnd: + printSubLinesAfter(filename, output.blockEnding, output.firstLine, curCol) + if linesAfter + linesBefore >= 2 and not newLine and + optFilenames notin options: stdout.write("\n") - var lineRepl = 1 - let si: SearchInfo = (buf: buffer, filename: filename) +iterator searchFile(pattern: Pattern; buffer: string): Output = var prevMi, curMi: MatchInfo - curMi.lineEnd = 1 + prevMi.lineEnd = 1 var i = 0 var matches: array[0..re.MaxSubpatterns-1, string] for j in 0..high(matches): matches[j] = "" - var reallyReplace = true - while i < buffer.len: + while true: let t = findBounds(buffer, pattern, matches, i) if t.first < 0 or t.last < t.first: - if optReplace notin options and prevMi.lineBeg != 0: # finalize last match - printLinesAfter(si, prevMi, 1+linesAfter) - stdout.flushFile() + if prevMi.lineBeg != 0: # finalize last match + yield Output(kind: blockEnd, + blockEnding: getSubLinesAfter(buffer, prevMi), + firstLine: prevMi.lineEnd) break - let lineBeg = curMi.lineEnd + countLineBreaks(buffer, i, t.first-1) + let lineBeg = prevMi.lineEnd + countLineBreaks(buffer, i, t.first-1) curMi = (first: t.first, last: t.last, lineBeg: lineBeg, lineEnd: lineBeg + countLineBreaks(buffer, t.first, t.last), match: buffer.substr(t.first, t.last)) - beforeHighlight() - inc counter - if optReplace notin options: - if prevMi.lineBeg == 0: # no previous match, so no previous block to finalize - printLinesBefore(si, curMi, linesBefore+1) - else: - printContextBetween(si, prevMi, curMi) - printMatch(si.fileName, curMi) - if t.last == buffer.len - 1: - stdout.write("\n") - stdout.flushFile() + if prevMi.lineBeg == 0: # no prev. match, so no prev. block to finalize + let pre = getSubLinesBefore(buffer, curMi) + prevMi = curMi + yield Output(kind: blockFirstMatch, pre: pre, match: move(curMi)) else: - let r = replace(curMi.match, pattern, replacement % matches) - doReplace(si, curMi, i, r, result, lineRepl, reallyReplace) - + let nLinesBetween = curMi.lineBeg - prevMi.lineEnd + if nLinesBetween <= linesAfter + linesBefore + 1: # print as 1 block + let pre = getSubLinesBetween(buffer, prevMi, curMi) + prevMi = curMi + yield Output(kind: blockNextMatch, pre: pre, match: move(curMi)) + else: # finalize previous block and then print next block + let after = getSubLinesAfter(buffer, prevMi) + yield Output(kind: blockEnd, blockEnding: after, + firstLine: prevMi.lineEnd) + let pre = getSubLinesBefore(buffer, curMi) + prevMi = curMi + yield Output(kind: blockFirstMatch, + pre: pre, + match: move(curMi)) i = t.last+1 - prevMi = curMi - if optReplace in options: - result.add(substr(buffer, i)) # finalize new buffer after last match - var f: File - if open(f, filename, fmWrite): - f.write(result) - f.close() - else: - quit "cannot open file for overwriting: " & filename - -proc hasRightFileName(path: string): bool = - let filename = path.lastPathPart - let ex = filename.splitFile.ext.substr(1) # skip leading '.' - if extensions.len != 0: - var matched = false - for x in items(extensions): - if os.cmpPaths(x, ex) == 0: - matched = true - break - if not matched: return false - for x in items(skipExtensions): - if os.cmpPaths(x, ex) == 0: return false - if includeFile.len != 0: - var matched = false - for x in items(includeFile): - if filename.match(x): - matched = true - break - if not matched: return false - for x in items(excludeFile): - if filename.match(x): return false - result = true +func detectBin(buffer: string): bool = + for i in 0 ..< min(1024, buffer.len): + if buffer[i] == '\0': + return true -proc hasRightDirectory(path: string): bool = - let dirname = path.lastPathPart - for x in items(excludeDir): - if dirname.match(x): return false - result = true +proc compilePeg(initPattern: string): Peg = + var pattern = initPattern + if optWord in options: + pattern = r"(^ / !\letter)(" & pattern & r") !\letter" + if optIgnoreStyle in options: + pattern = "\\y " & pattern + elif optIgnoreCase in options: + pattern = "\\i " & pattern + result = peg(pattern) proc styleInsensitive(s: string): string = template addx = @@ -494,28 +868,358 @@ proc styleInsensitive(s: string): string = addx() else: addx() -proc walker(pattern; dir: string; counter: var int, errors: var int) = - if dirExists(dir): - for kind, path in walkDir(dir): +proc compileRegex(initPattern: string): Regex = + var pattern = initPattern + var reflags = {reStudy} + if optIgnoreStyle in options: + pattern = styleInsensitive(pattern) + if optWord in options: + # see https://github.com/nim-lang/Nim/issues/13528#issuecomment-592786443 + pattern = r"(^|\W)(:?" & pattern & r")($|\W)" + if {optIgnoreCase, optIgnoreStyle} * options != {}: + reflags.incl reIgnoreCase + result = if optRex in options: rex(pattern, reflags) + else: re(pattern, reflags) + +template declareCompiledPatterns(compiledStruct: untyped, + StructType: untyped, + body: untyped) = + {.hint[XDeclaredButNotUsed]: off.} + if optRegex notin options: + var compiledStruct: StructType[Peg] + template compile1Pattern(p: string, pat: Peg) = + if p!="": pat = p.compilePeg() + proc compileArray(initPattern: seq[string]): seq[Peg] = + for pat in initPattern: + result.add pat.compilePeg() + body + else: + var compiledStruct: StructType[Regex] + template compile1Pattern(p: string, pat: Regex) = + if p!="": pat = p.compileRegex() + proc compileArray(initPattern: seq[string]): seq[Regex] = + for pat in initPattern: + result.add pat.compileRegex() + body + {.hint[XDeclaredButNotUsed]: on.} + +iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, + yieldContents=false): Output = + var buffer: string + + var error = false + if optFilenames in options: + buffer = filename + elif optPipe in options: + buffer = stdin.readAll() + else: + try: + buffer = system.readFile(filename) + except IOError as e: + yield Output(kind: openError, msg: "readFile failed") + error = true + + if not error: + var reject = false + var reason: string + if searchOpt.checkBin in {biOff, biOnly}: + let isBin = detectBin(buffer) + if isBin and searchOpt.checkBin == biOff: + reject = true + reason = "binary file" + if (not isBin) and searchOpt.checkBin == biOnly: + reject = true + reason = "text file" + + if not reject: + if searchOpt.checkMatch != "": + reject = not contains(buffer, searchOptC.checkMatch, 0) + reason = "doesn't contain a requested match" + + if not reject: + if searchOpt.checkNoMatch != "": + reject = contains(buffer, searchOptC.checkNoMatch, 0) + reason = "contains a forbidden match" + + if reject: + yield Output(kind: rejected, reason: move(reason)) + elif optFilenames in options and searchOpt.pattern == "": + yield Output(kind: outputFileName, name: move(buffer)) + else: + var found = false + var cnt = 0 + for output in searchFile(searchOptC.pattern, buffer): + found = true + if optCount notin options: + yield output + else: + if output.kind in {blockFirstMatch, blockNextMatch}: + inc(cnt) + if optCount in options and cnt > 0: + yield Output(kind: justCount, matches: cnt) + if yieldContents and found and optCount notin options: + yield Output(kind: fileContents, buffer: move(buffer)) + + +proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = + let filename = path.lastPathPart + let ex = filename.splitFile.ext.substr(1) # skip leading '.' + if walkOpt.extensions.len != 0: + var matched = false + for x in walkOpt.extensions: + if os.cmpPaths(x, ex) == 0: + matched = true + break + if not matched: return false + for x in walkOpt.skipExtensions: + if os.cmpPaths(x, ex) == 0: return false + if walkOptC.includeFile.len != 0: + var matched = false + for pat in walkOptC.includeFile: + if filename.contains(pat): + matched = true + break + if not matched: return false + for pat in walkOptC.excludeFile: + if filename.contains(pat): return false + let dirname = path.parentDir + if walkOptC.includeDir.len != 0: + var matched = false + for pat in walkOptC.includeDir: + if dirname.contains(pat): + matched = true + break + if not matched: return false + result = true + +proc hasRightDirectory(path: string, walkOptC: WalkOptComp[Pattern]): bool = + let dirname = path.lastPathPart + for pat in walkOptC.excludeDir: + if dirname.contains(pat): return false + result = true + +iterator walkDirBasic(dir: string, walkOptC: WalkOptComp[Pattern]): string = + var dirStack = @[dir] # stack of directories + var timeFiles = newSeq[(times.Time, string)]() + while dirStack.len > 0: + let d = dirStack.pop() + var files = newSeq[string]() + var dirs = newSeq[string]() + for kind, path in walkDir(d): case kind of pcFile: - if path.hasRightFileName: - processFile(pattern, path, counter, errors) + if path.hasRightFileName(walkOptC): + files.add(path) of pcLinkToFile: - if optFollow in options and path.hasRightFileName: - processFile(pattern, path, counter, errors) + if optFollow in options and path.hasRightFileName(walkOptC): + files.add(path) of pcDir: - if optRecursive in options and path.hasRightDirectory: - walker(pattern, path, counter, errors) + if optRecursive in options and path.hasRightDirectory(walkOptC): + dirs.add path of pcLinkToDir: if optFollow in options and optRecursive in options and - path.hasRightDirectory: - walker(pattern, path, counter, errors) - elif fileExists(dir): - processFile(pattern, dir, counter, errors) + path.hasRightDirectory(walkOptC): + dirs.add path + if sortTime: # sort by time - collect files before yielding + for file in files: + var time: Time + try: + time = getLastModificationTime(file) # can fail for broken symlink + except: + discard + timeFiles.add((time, file)) + else: # alphanumeric sort, yield immediately after sorting + files.sort() + for file in files: + yield file + dirs.sort(order = SortOrder.Descending) + for dir in dirs: + dirStack.add(dir) + if sortTime: + timeFiles.sort(sortTimeOrder) + for (_, file) in timeFiles: + yield file + +iterator walkRec(paths: seq[string]): (string, string) = + declareCompiledPatterns(walkOptC, WalkOptComp): + walkOptC.excludeFile.add walkOpt.excludeFile.compileArray() + walkOptC.includeFile.add walkOpt.includeFile.compileArray() + walkOptC.includeDir.add walkOpt.includeDir.compileArray() + walkOptC.excludeDir.add walkOpt.excludeDir.compileArray() + for path in paths: + if dirExists(path): + for p in walkDirBasic(path, walkOptC): + yield ("", p) + else: + yield ( + if fileExists(path): ("", path) + else: ("Error: no such file or directory: ", path)) + +proc replaceMatches(pattern: Pattern; filename: string, buffer: string, + fileResult: FileResult) = + var newBuf = newStringOfCap(buffer.len) + + var changed = false + var lineRepl = 1 + var i = 0 + for output in fileResult: + if output.kind in {blockFirstMatch, blockNextMatch}: + let curMi = output.match + let r = replacef(curMi.match, pattern, replacement) + if replace1match(filename, buffer, curMi, i, r, newBuf, lineRepl): + changed = true + i = curMi.last + 1 + if changed and optPipe notin options: + newBuf.add(substr(buffer, i)) # finalize new buffer after last match + var f: File + if open(f, filename, fmWrite): + f.write(newBuf) + f.close() + else: + printError "cannot open file for overwriting: " & filename + inc(gVar.errors) + elif optPipe in options: # always print new buffer to stdout in pipe mode + newBuf.add(substr(buffer, i)) # finalize new buffer after last match + stdout.write(newBuf) + +template processFileResult(pattern: Pattern; filename: string, + fileResult: untyped) = + var filenameShown = false + template showFilename = + if not filenameShown: + printBlockFile(filename) + stdout.write("\n") + stdout.flushFile() + filenameShown = true + if optVerbose in options: + showFilename + if optReplace notin options: + var curCol: Column + var toFlush: bool + for output in fileResult: + updateCounters(output) + toFlush = true + if output.kind notin {rejected, openError, justCount} and not oneline: + showFilename + if output.kind == justCount and oneline: + printFile(filename & ":") + printOutput(filename, output, curCol) + if nWorkers == 0 and output.kind in {blockFirstMatch, blockNextMatch}: + stdout.flushFile() # flush immediately in single thread mode + if toFlush: stdout.flushFile() else: - printError "Error: no such file or directory: " & dir - inc(errors) + var buffer = "" + var matches: FileResult + for output in fileResult: + updateCounters(output) + case output.kind + of rejected, openError, justCount, outputFileName: + printInfo(filename, output) + of blockFirstMatch, blockNextMatch, blockEnd: + matches.add(output) + of fileContents: buffer = output.buffer + if matches.len > 0: + replaceMatches(pattern, filename, buffer, matches) + +proc run1Thread() = + declareCompiledPatterns(searchOptC, SearchOptComp): + compile1Pattern(searchOpt.pattern, searchOptC.pattern) + compile1Pattern(searchOpt.checkMatch, searchOptC.checkMatch) + compile1Pattern(searchOpt.checkNoMatch, searchOptC.checkNoMatch) + if optPipe in options: + processFileResult(searchOptC.pattern, "-", + processFile(searchOptC, "-", + yieldContents=optReplace in options)) + for (err, filename) in walkRec(paths): + if err != "": + inc(gVar.errors) + printError (err & filename) + continue + processFileResult(searchOptC.pattern, filename, + processFile(searchOptC, filename, + yieldContents=optReplace in options)) + +# Multi-threaded version: all printing is being done in the Main thread. +# Totally nWorkers+1 additional threads are created (workers + pathProducer). +# An example of case nWorkers=2: +# +# ------------------ initial paths ------------------- +# | Main thread |----------------->| pathProducer | +# ------------------ ------------------- +# ^ | | +# resultsChan | walking errors, | | searchRequestsChan +# | number of files | -----+----- +# ----+--------------------------- | | +# | | (when walking finished) |a path |a path to file +# | | | | +# | | V V +# | | ------------ ------------ +# | | | worker 1 | | worker 2 | +# | | ------------ ------------ +# | | matches in the file | | +# | -------------------------------- | +# | matches in the file | +# ---------------------------------------------- +# +# The matches from each file are passed at once as FileResult type. + +proc worker(initSearchOpt: SearchOpt) {.thread.} = + searchOpt = initSearchOpt # init thread-local var + declareCompiledPatterns(searchOptC, SearchOptComp): + compile1Pattern(searchOpt.pattern, searchOptC.pattern) + compile1Pattern(searchOpt.checkMatch, searchOptC.checkMatch) + compile1Pattern(searchOpt.checkNoMatch, searchOptC.checkNoMatch) + while true: + let (fileNo, filename) = searchRequestsChan.recv() + var fileResult: FileResult + for output in processFile(searchOptC, filename, + yieldContents=(optReplace in options)): + fileResult.add(output) + resultsChan.send((false, fileNo, filename, move(fileResult))) + +proc pathProducer(arg: (seq[string], WalkOpt)) {.thread.} = + let paths = arg[0] + walkOpt = arg[1] # init thread-local copy of opt + var + nextFileN = 0 + for (err, filename) in walkRec(paths): + if err == "": + searchRequestsChan.send((nextFileN,filename)) + else: + resultsChan.send((false, nextFileN, + filename, @[Output(kind: openError, msg: err)])) + nextFileN += 1 + resultsChan.send((true, nextFileN, "", @[])) # pass total number of files + +proc runMultiThread() = + var + workers = newSeq[Thread[SearchOpt]](nWorkers) + storage = newTable[int, (string, FileResult) ]() + # file number -> tuple[filename, fileResult - accumulated data structure] + firstUnprocessedFile = 0 # for always processing files in the same order + open(searchRequestsChan) + open(resultsChan) + for n in 0 ..< nWorkers: + createThread(workers[n], worker, searchOpt) + var producerThread: Thread[(seq[string], WalkOpt)] + createThread(producerThread, pathProducer, (paths, walkOpt)) + declareCompiledPatterns(pat, SinglePattern): + compile1Pattern(searchOpt.pattern, pat.pattern) + template add1fileResult(fileNo: int, fname: string, fResult: FileResult) = + storage[fileNo] = (fname, fResult) + while storage.haskey(firstUnprocessedFile): + let fileResult = storage[firstUnprocessedFile][1] + let filename = storage[firstUnprocessedFile][0] + processFileResult(pat.pattern, filename, fileResult) + storage.del(firstUnprocessedFile) + firstUnprocessedFile += 1 + var totalFiles = -1 # will be known when pathProducer finishes + while totalFiles == -1 or firstUnprocessedFile < totalFiles: + let msg = resultsChan.recv() + if msg.finished: + totalFiles = msg.fileNo + else: + add1fileResult(msg.fileNo, msg.filename, msg.fileResult) proc reportError(msg: string) = printError "Error: " & msg @@ -535,6 +1239,15 @@ proc checkOptions(subset: TOptions, a, b: string) = if subset <= options: quit("cannot specify both '$#' and '$#'" % [a, b]) +proc parseNonNegative(str: string, key: string): int = + try: + result = parseInt(str) + except ValueError: + reportError("Option " & key & " requires an integer but '" & + str & "' was given") + if result < 0: + reportError("A positive integer is expected for option " & key) + when defined(posix): useWriteStyled = terminal.isatty(stdout) # that should be before option processing to allow override of useWriteStyled @@ -543,13 +1256,15 @@ for kind, key, val in getopt(): case kind of cmdArgument: if options.contains(optStdin): - filenames.add(key) - elif pattern.len == 0: - pattern = key - elif options.contains(optReplace) and replacement.len == 0: + paths.add(key) + elif not searchOpt.patternSet: + searchOpt.pattern = key + searchOpt.patternSet = true + elif options.contains(optReplace) and not replacementSet: replacement = key + replacementSet = true else: - filenames.add(key) + paths.add(key) of cmdLongOption, cmdShortOption: case normalize(key) of "find", "f": incl(options, optFind) @@ -569,98 +1284,133 @@ for kind, key, val in getopt(): of "confirm": incl(options, optConfirm) of "stdin": incl(options, optStdin) of "word", "w": incl(options, optWord) - of "ignorecase", "i": incl(options, optIgnoreCase) - of "ignorestyle", "y": incl(options, optIgnoreStyle) - of "ext": extensions.add val.split('|') - of "noext": skipExtensions.add val.split('|') - of "excludedir", "exclude-dir": excludeDir.add rex(val) - of "includefile", "include-file": includeFile.add rex(val) - of "excludefile", "exclude-file": excludeFile.add rex(val) - of "nocolor": useWriteStyled = false + of "ignorecase", "ignore-case", "i": incl(options, optIgnoreCase) + of "ignorestyle", "ignore-style", "y": incl(options, optIgnoreStyle) + of "threads", "j": + if val == "": + nWorkers = countProcessors() + else: + nWorkers = parseNonNegative(val, key) + of "ext": walkOpt.extensions.add val.split('|') + of "noext", "no-ext": walkOpt.skipExtensions.add val.split('|') + of "excludedir", "exclude-dir", "ed": walkOpt.excludeDir.add val + of "includedir", "include-dir", "id": walkOpt.includeDir.add val + of "includefile", "include-file", "if": walkOpt.includeFile.add val + of "excludefile", "exclude-file", "ef": walkOpt.excludeFile.add val + of "match": searchOpt.checkMatch = val + of "nomatch": + searchOpt.checkNoMatch = val + of "bin": + case val + of "on": searchOpt.checkBin = biOn + of "off": searchOpt.checkBin = biOff + of "only": searchOpt.checkBin = biOnly + else: reportError("unknown value for --bin") + of "text", "t": searchOpt.checkBin = biOff + of "count": incl(options, optCount) + of "sorttime", "sort-time", "s": + case normalize(val) + of "off": sortTime = false + of "", "on", "asc", "ascending": + sortTime = true + sortTimeOrder = SortOrder.Ascending + of "desc", "descending": + sortTime = true + sortTimeOrder = SortOrder.Descending + else: reportError("invalid value '" & val & "' for --sortTime") + of "nocolor", "no-color": useWriteStyled = false of "color": case val of "auto": discard - of "never", "false": useWriteStyled = false - of "", "always", "true": useWriteStyled = true + of "off", "never", "false": useWriteStyled = false + of "", "on", "always", "true": useWriteStyled = true else: reportError("invalid value '" & val & "' for --color") - of "colortheme": + of "colortheme", "color-theme": colortheme = normalize(val) if colortheme notin ["simple", "bnw", "ack", "gnu"]: reportError("unknown colortheme '" & val & "'") of "beforecontext", "before-context", "b": - try: - linesBefore = parseInt(val) - except ValueError: - reportError("option " & key & " requires an integer but '" & - val & "' was given") + linesBefore = parseNonNegative(val, key) of "aftercontext", "after-context", "a": - try: - linesAfter = parseInt(val) - except ValueError: - reportError("option " & key & " requires an integer but '" & - val & "' was given") + linesAfter = parseNonNegative(val, key) of "context", "c": - try: - linesContext = parseInt(val) - except ValueError: - reportError("option --context requires an integer but '" & - val & "' was given") - of "newline", "l": newLine = true - of "oneline": oneline = true - of "group", "g": oneline = false + linesContext = parseNonNegative(val, key) + of "newline", "l": + newLine = true + # Tabs are aligned automatically for --group, --newLine, --filenames + expandTabs = false + of "group", "g": + oneline = false + expandTabs = false + of "cols", "%": + incl(options, optLimitChars) + termWidth = terminalWidth() + if val == "auto" or key == "%": + limitCharUsr = termWidth + when defined(windows): # Windows cmd & powershell add an empty line + limitCharUsr -= 1 # when printing '\n' right after the last column + elif val == "": + limitCharUsr = 80 + else: + limitCharUsr = parseNonNegative(val, key) + of "onlyascii", "only-ascii", "@": + if val == "" or val == "on" or key == "@": + optOnlyAscii = true + elif val == "off": + optOnlyAscii = false + else: + printError("unknown value for --onlyAscii option") of "verbose": incl(options, optVerbose) - of "filenames": incl(options, optFilenames) + of "filenames": + incl(options, optFilenames) + expandTabs = false of "help", "h": writeHelp() of "version", "v": writeVersion() + of "": incl(options, optPipe) else: reportError("unrecognized option '" & key & "'") of cmdEnd: assert(false) # cannot happen checkOptions({optFind, optReplace}, "find", "replace") +checkOptions({optCount, optReplace}, "count", "replace") checkOptions({optPeg, optRegex}, "peg", "re") checkOptions({optIgnoreCase, optIgnoreStyle}, "ignore_case", "ignore_style") checkOptions({optFilenames, optReplace}, "filenames", "replace") +checkOptions({optPipe, optStdin}, "-", "stdin") +checkOptions({optPipe, optFilenames}, "-", "filenames") +checkOptions({optPipe, optConfirm}, "-", "confirm") +checkOptions({optPipe, optRecursive}, "-", "recursive") linesBefore = max(linesBefore, linesContext) linesAfter = max(linesAfter, linesContext) +if optPipe in options and paths.len != 0: + reportError("both - and paths are specified") + if optStdin in options: - pattern = ask("pattern [ENTER to exit]: ") - if pattern.len == 0: quit(0) + searchOpt.pattern = ask("pattern [ENTER to exit]: ") + if searchOpt.pattern.len == 0: quit(0) if optReplace in options: replacement = ask("replacement [supports $1, $# notations]: ") -if pattern.len == 0: +if optReplace in options and not replacementSet: + reportError("provide REPLACEMENT as second argument (use \"\" for empty one)") +if optReplace in options and paths.len == 0 and optPipe notin options: + reportError("provide paths for replacement explicitly (use . for current directory)") + +if searchOpt.pattern == "" and optFilenames notin options: reportError("empty pattern was given") else: - var counter = 0 - var errors = 0 - if filenames.len == 0: - filenames.add(os.getCurrentDir()) - if optRegex notin options: - if optWord in options: - pattern = r"(^ / !\letter)(" & pattern & r") !\letter" - if optIgnoreStyle in options: - pattern = "\\y " & pattern - elif optIgnoreCase in options: - pattern = "\\i " & pattern - let pegp = peg(pattern) - for f in items(filenames): - walker(pegp, f, counter, errors) + if paths.len == 0 and optPipe notin options: + paths.add(".") + if optPipe in options or nWorkers == 0: + run1Thread() else: - var reflags = {reStudy} - if optIgnoreStyle in options: - pattern = styleInsensitive(pattern) - if optWord in options: - # see https://github.com/nim-lang/Nim/issues/13528#issuecomment-592786443 - pattern = r"(^|\W)(:?" & pattern & r")($|\W)" - if {optIgnoreCase, optIgnoreStyle} * options != {}: - reflags.incl reIgnoreCase - let rep = if optRex in options: rex(pattern, reflags) - else: re(pattern, reflags) - for f in items(filenames): - walker(rep, f, counter, errors) - if errors != 0: - printError $errors & " errors" - stdout.write($counter & " matches\n") - if errors != 0: + runMultiThread() + if gVar.errors != 0: + printError $gVar.errors & " errors" + if searchOpt.pattern != "": + # PATTERN allowed to be empty if --filenames is given + printBold($gVar.matches & " matches") + stdout.write("\n") + if gVar.errors != 0: quit(1) diff --git a/tools/nimgrep.nim.cfg b/tools/nimgrep.nim.cfg index 6d0ea5aadb8ba..64d3edc7ae791 100644 --- a/tools/nimgrep.nim.cfg +++ b/tools/nimgrep.nim.cfg @@ -1,5 +1,4 @@ -# The GC is stable enough now: - -#--gc:none - - +# don't use --gc:refc because of bug +# https://github.com/nim-lang/Nim/issues/14138 . +# --gc:orc and --gc:markandsweep work well. +--threads:on --gc:orc