diff --git a/pkg/transformers/aaa_record_transformer.go b/pkg/transformers/aaa_record_transformer.go index 1f9bae7dd6..1be4fc9176 100644 --- a/pkg/transformers/aaa_record_transformer.go +++ b/pkg/transformers/aaa_record_transformer.go @@ -27,6 +27,12 @@ type RecordTransformerFunc func( outputDownstreamDoneChannel chan<- bool, ) +// Used within some verbs +type RecordTransformerHelperFunc func( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) + type TransformerUsageFunc func( ostream *os.File, ) diff --git a/pkg/transformers/reorder.go b/pkg/transformers/reorder.go index 216dd714da..141b55c19a 100644 --- a/pkg/transformers/reorder.go +++ b/pkg/transformers/reorder.go @@ -4,6 +4,7 @@ import ( "container/list" "fmt" "os" + "regexp" "strings" "github.com/johnkerl/miller/pkg/cli" @@ -61,9 +62,9 @@ func transformerReorderParseCLI( argi++ var fieldNames []string = nil - putAtEnd := false - beforeFieldName := "" - afterFieldName := "" + doRegexes := false + putAfter := false + centerFieldName := "" for argi < argc /* variable increment: 1 or 2 depending on flag */ { opt := args[argi] @@ -81,21 +82,23 @@ func transformerReorderParseCLI( } else if opt == "-f" { fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + doRegexes = false + + } else if opt == "-r" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + doRegexes = true } else if opt == "-b" { - beforeFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) - afterFieldName = "" - putAtEnd = false + centerFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + putAfter = false } else if opt == "-a" { - afterFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) - beforeFieldName = "" - putAtEnd = false + centerFieldName = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + putAfter = true } else if opt == "-e" { - putAtEnd = true - beforeFieldName = "" - afterFieldName = "" + putAfter = true + centerFieldName = "" } else { transformerReorderUsage(os.Stderr) @@ -115,9 +118,9 @@ func transformerReorderParseCLI( transformer, err := NewTransformerReorder( fieldNames, - putAtEnd, - beforeFieldName, - afterFieldName, + doRegexes, + putAfter, + centerFieldName, ) if err != nil { fmt.Fprintln(os.Stderr, err) @@ -132,43 +135,71 @@ type TransformerReorder struct { // input fieldNames []string fieldNamesSet map[string]bool - beforeFieldName string - afterFieldName string + regexes []*regexp.Regexp + centerFieldName string + putAfter bool // state - recordTransformerFunc RecordTransformerFunc + recordTransformerFunc RecordTransformerHelperFunc } func NewTransformerReorder( fieldNames []string, - putAtEnd bool, - beforeFieldName string, - afterFieldName string, + doRegexes bool, + putAfter bool, + centerFieldName string, ) (*TransformerReorder, error) { tr := &TransformerReorder{ fieldNames: fieldNames, fieldNamesSet: lib.StringListToSet(fieldNames), - beforeFieldName: beforeFieldName, - afterFieldName: afterFieldName, + centerFieldName: centerFieldName, + putAfter: putAfter, } - if putAtEnd { - tr.recordTransformerFunc = tr.reorderToEnd - } else if beforeFieldName != "" { - tr.recordTransformerFunc = tr.reorderBefore - } else if afterFieldName != "" { - tr.recordTransformerFunc = tr.reorderAfter + if centerFieldName == "" { + if putAfter { + if doRegexes { + tr.recordTransformerFunc = tr.reorderToEndWithRegex + } else { + tr.recordTransformerFunc = tr.reorderToEndNoRegex + } + } else { + if doRegexes { + tr.recordTransformerFunc = tr.reorderToStartWithRegex + } else { + tr.recordTransformerFunc = tr.reorderToStartNoRegex + lib.ReverseStringList(tr.fieldNames) + } + } } else { - tr.recordTransformerFunc = tr.reorderToStart - lib.ReverseStringList(tr.fieldNames) + if doRegexes { + tr.recordTransformerFunc = tr.reorderBeforeOrAfterWithRegex + } else { + tr.recordTransformerFunc = tr.reorderBeforeOrAfterNoRegex + } + } + + if doRegexes { + tr.regexes = make([]*regexp.Regexp, len(fieldNames)) + for i, regexString := range fieldNames { + // Handles "a.*b"i Miller case-insensitive-regex specification + regex, err := lib.CompileMillerRegex(regexString) + if err != nil { + fmt.Fprintf( + os.Stderr, + "%s %s: cannot compile regex [%s]\n", + "mlr", verbNameCut, regexString, + ) + os.Exit(1) + } + tr.regexes[i] = regex + } } return tr, nil } -// ---------------------------------------------------------------- - func (tr *TransformerReorder) Transform( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext @@ -176,156 +207,198 @@ func (tr *TransformerReorder) Transform( outputDownstreamDoneChannel chan<- bool, ) { HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) - tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel) + if !inrecAndContext.EndOfStream { + tr.recordTransformerFunc( + inrecAndContext, + outputRecordsAndContexts, + ) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + } } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderToStart( +func (tr *TransformerReorder) reorderToStartNoRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) + inrec := inrecAndContext.Record + for _, fieldName := range tr.fieldNames { + inrec.MoveToHead(fieldName) + } + outputRecordsAndContexts.PushBack(inrecAndContext) +} + +func (tr *TransformerReorder) reorderToStartWithRegex( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) { + inrec := inrecAndContext.Record + + outrec := mlrval.NewMlrmapAsRecord() + atEnds := list.New() + for pe := inrec.Head; pe != nil; pe = pe.Next { + found := false + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + outrec.PutReference(pe.Key, pe.Value) + found = true + break + } } - outputRecordsAndContexts.PushBack(inrecAndContext) + if !found { + atEnds.PushBack(pe) + } + } - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { + // Ownership transfer; no copy needed + pe := atEnd.Value.(*mlrval.MlrmapEntry) + outrec.PutReference(pe.Key, pe.Value) } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderToEnd( +func (tr *TransformerReorder) reorderToEndNoRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - for _, fieldName := range tr.fieldNames { - inrec.MoveToTail(fieldName) - } - outputRecordsAndContexts.PushBack(inrecAndContext) - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + inrec := inrecAndContext.Record + for _, fieldName := range tr.fieldNames { + inrec.MoveToTail(fieldName) } + outputRecordsAndContexts.PushBack(inrecAndContext) + } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderBefore( +func (tr *TransformerReorder) reorderToEndWithRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - if inrec.Get(tr.beforeFieldName) == nil { - outputRecordsAndContexts.PushBack(inrecAndContext) - return + inrec := inrecAndContext.Record + outrec := mlrval.NewMlrmapAsRecord() + atEnds := list.New() + for pe := inrec.Head; pe != nil; pe = pe.Next { + found := false + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + atEnds.PushBack(pe) + found = true + break + } } + if !found { + outrec.PutReference(pe.Key, pe.Value) + } + } - outrec := mlrval.NewMlrmapAsRecord() - pe := inrec.Head + for atEnd := atEnds.Front(); atEnd != nil; atEnd = atEnd.Next() { + // Ownership transfer; no copy needed + pe := atEnd.Value.(*mlrval.MlrmapEntry) + outrec.PutReference(pe.Key, pe.Value) + } - // * inrec will be GC'ed - // * We will use outrec.PutReference not output.PutCopy since inrec will be GC'ed + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) +} - for ; pe != nil; pe = pe.Next { - if pe.Key == tr.beforeFieldName { - break - } - if !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) - } +func (tr *TransformerReorder) reorderBeforeOrAfterNoRegex( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext +) { + inrec := inrecAndContext.Record + if inrec.Get(tr.centerFieldName) == nil { + outputRecordsAndContexts.PushBack(inrecAndContext) + return + } + + outrec := mlrval.NewMlrmapAsRecord() + pe := inrec.Head + + // We use outrec.PutReference not output.PutCopy since inrec will be GC'ed + + for ; pe != nil; pe = pe.Next { + if pe.Key == tr.centerFieldName { + break + } + if !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) } + } + if !tr.putAfter { for _, fieldName := range tr.fieldNames { value := inrec.Get(fieldName) if value != nil { outrec.PutReference(fieldName, value) } } + } - value := inrec.Get(tr.beforeFieldName) - if value != nil { - outrec.PutReference(tr.beforeFieldName, value) - } + value := inrec.Get(tr.centerFieldName) + if value != nil { + outrec.PutReference(tr.centerFieldName, value) + } - for ; pe != nil; pe = pe.Next { - if pe.Key != tr.beforeFieldName && !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) + if tr.putAfter { + for _, fieldName := range tr.fieldNames { + value := inrec.Get(fieldName) + if value != nil { + outrec.PutReference(fieldName, value) } } + } - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) + for ; pe != nil; pe = pe.Next { + if pe.Key != tr.centerFieldName && !tr.fieldNamesSet[pe.Key] { + outrec.PutReference(pe.Key, pe.Value) } - outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) - - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker } + + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) + } -// ---------------------------------------------------------------- -func (tr *TransformerReorder) reorderAfter( +func (tr *TransformerReorder) reorderBeforeOrAfterWithRegex( inrecAndContext *types.RecordAndContext, outputRecordsAndContexts *list.List, // list of *types.RecordAndContext - inputDownstreamDoneChannel <-chan bool, - outputDownstreamDoneChannel chan<- bool, ) { - if !inrecAndContext.EndOfStream { - inrec := inrecAndContext.Record - if inrec.Get(tr.afterFieldName) == nil { - outputRecordsAndContexts.PushBack(inrecAndContext) - return - } - - outrec := mlrval.NewMlrmapAsRecord() - pe := inrec.Head - - // * inrec will be GC'ed - // * We will use outrec.PutReference not output.PutCopy since inrec will be GC'ed + inrec := inrecAndContext.Record + if inrec.Get(tr.centerFieldName) == nil { + outputRecordsAndContexts.PushBack(inrecAndContext) + return + } - for ; pe != nil; pe = pe.Next { - if pe.Key == tr.afterFieldName { - break - } - if !tr.fieldNamesSet[pe.Key] { - outrec.PutReference(pe.Key, pe.Value) + matchingFieldNamesSet := lib.NewOrderedMap() + for pe := inrec.Head; pe != nil; pe = pe.Next { + for _, regex := range tr.regexes { + if regex.MatchString(pe.Key) { + if pe.Key != tr.centerFieldName { + matchingFieldNamesSet.Put(pe.Key, pe.Value) + break + } } } + } - value := inrec.Get(tr.afterFieldName) - if value != nil { - outrec.PutReference(tr.afterFieldName, value) - } - - for _, fieldName := range tr.fieldNames { - value := inrec.Get(fieldName) - if value != nil { - outrec.PutReference(fieldName, value) + // We use outrec.PutReference not output.PutCopy since inrec will be GC'ed + outrec := mlrval.NewMlrmapAsRecord() + for pe := inrec.Head; pe != nil; pe = pe.Next { + if pe.Key == tr.centerFieldName { + if tr.putAfter { + outrec.PutReference(pe.Key, pe.Value) } - } - - for ; pe != nil; pe = pe.Next { - if pe.Key != tr.afterFieldName && !tr.fieldNamesSet[pe.Key] { + for pf := matchingFieldNamesSet.Head; pf != nil; pf = pf.Next { + outrec.PutReference(pf.Key, pf.Value.(*mlrval.Mlrval)) + } + if !tr.putAfter { outrec.PutReference(pe.Key, pe.Value) } + } else if !matchingFieldNamesSet.Has(pe.Key) { + outrec.PutReference(pe.Key, pe.Value) } - - for _, fieldName := range tr.fieldNames { - inrec.MoveToHead(fieldName) - } - outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) - - } else { - outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker } + + outputRecordsAndContexts.PushBack(types.NewRecordAndContext(outrec, &inrecAndContext.Context)) } diff --git a/test/cases/verb-reorder/regex-after/cmd b/test/cases/verb-reorder/regex-after/cmd new file mode 100644 index 0000000000..59a79f7f6d --- /dev/null +++ b/test/cases/verb-reorder/regex-after/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -a 6 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-after/experr b/test/cases/verb-reorder/regex-after/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-reorder/regex-after/expout b/test/cases/verb-reorder/regex-after/expout new file mode 100644 index 0000000000..62cb82ad5c --- /dev/null +++ b/test/cases/verb-reorder/regex-after/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +6 f +3 c +8 h +9 i +7 g +10 j diff --git a/test/cases/verb-reorder/regex-before/cmd b/test/cases/verb-reorder/regex-before/cmd new file mode 100644 index 0000000000..f207567a86 --- /dev/null +++ b/test/cases/verb-reorder/regex-before/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -b 6 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-before/experr b/test/cases/verb-reorder/regex-before/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-reorder/regex-before/expout b/test/cases/verb-reorder/regex-before/expout new file mode 100644 index 0000000000..ef4d4f1666 --- /dev/null +++ b/test/cases/verb-reorder/regex-before/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +3 c +8 h +9 i +6 f +7 g +10 j diff --git a/test/cases/verb-reorder/regex-end/cmd b/test/cases/verb-reorder/regex-end/cmd new file mode 100644 index 0000000000..8c3e21c81b --- /dev/null +++ b/test/cases/verb-reorder/regex-end/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 -e test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-end/experr b/test/cases/verb-reorder/regex-end/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-reorder/regex-end/expout b/test/cases/verb-reorder/regex-end/expout new file mode 100644 index 0000000000..7a7424aa94 --- /dev/null +++ b/test/cases/verb-reorder/regex-end/expout @@ -0,0 +1,10 @@ +1 a +2 b +4 d +5 e +6 f +7 g +10 j +3 c +8 h +9 i diff --git a/test/cases/verb-reorder/regex-start/cmd b/test/cases/verb-reorder/regex-start/cmd new file mode 100644 index 0000000000..2020a1393e --- /dev/null +++ b/test/cases/verb-reorder/regex-start/cmd @@ -0,0 +1 @@ +mlr --n2x reorder -r 3,9,8 test/input/reorder-regex.nidx diff --git a/test/cases/verb-reorder/regex-start/experr b/test/cases/verb-reorder/regex-start/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-reorder/regex-start/expout b/test/cases/verb-reorder/regex-start/expout new file mode 100644 index 0000000000..ee16332d97 --- /dev/null +++ b/test/cases/verb-reorder/regex-start/expout @@ -0,0 +1,10 @@ +3 c +8 h +9 i +1 a +2 b +4 d +5 e +6 f +7 g +10 j diff --git a/test/input/reorder-regex.nidx b/test/input/reorder-regex.nidx new file mode 100644 index 0000000000..6a76ef8fa8 --- /dev/null +++ b/test/input/reorder-regex.nidx @@ -0,0 +1 @@ +a b c d e f g h i j