Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new upcase and downcase verbs #1217

Merged
merged 4 commits into from
Mar 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions docs/src/manpage.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,14 @@ MILLER(1) MILLER(1)

1mVERB LIST0m
altkv bar bootstrap cat check clean-whitespace count-distinct count
count-similar cut decimate fill-down fill-empty filter flatten format-values
fraction gap grep group-by group-like having-fields head histogram json-parse
json-stringify join label latin1-to-utf8 least-frequent merge-fields
most-frequent nest nothing put regularize remove-empty-columns rename reorder
repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records
sort sort-within-records split stats1 stats2 step summary tac tail tee
template top utf8-to-latin1 unflatten uniq unspace unsparsify
count-similar cut decimate downcase fill-down fill-empty filter flatten
format-values fraction gap grep group-by group-like having-fields head
histogram json-parse json-stringify join label latin1-to-utf8 least-frequent
merge-fields most-frequent nest nothing put regularize remove-empty-columns
rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
skip-trivial-records sort sort-within-records split stats1 stats2 step summary
tac tail tee template top utf8-to-latin1 unflatten uniq unspace unsparsify
upcase

1mFUNCTION LIST0m
abs acos acosh any append apply arrayify asin asinh asserting_absent
Expand Down Expand Up @@ -1002,6 +1003,14 @@ MILLER(1) MILLER(1)
-n {n} Decimation factor (default 10).
-h|--help Show this message.

1mdowncase0m
Usage: mlr downcase [options]
Lowercases strings in record keys and/or values.
Options:
-k Downcase only keys, not keys and values.
-v Downcase only values, not keys and values.
-h|--help Show this message.

1mfill-down0m
Usage: mlr fill-down [options]
If a given record has a missing value for a given field, fill that from
Expand Down Expand Up @@ -2107,6 +2116,14 @@ MILLER(1) MILLER(1)
being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and
'a=,b=3,c=4'.

1mupcase0m
Usage: mlr upcase [options]
Uppercases strings in record keys and/or values.
Options:
-k Upcase only keys, not keys and values.
-v Upcase only values, not keys and values.
-h|--help Show this message.

1mFUNCTIONS FOR FILTER/PUT0m
1mabs0m
(class=math #args=1) Absolute value.
Expand Down Expand Up @@ -3314,5 +3331,5 @@ MILLER(1) MILLER(1)



2023-03-02 MILLER(1)
2023-03-04 MILLER(1)
</pre>
33 changes: 25 additions & 8 deletions docs/src/manpage.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,14 @@ MILLER(1) MILLER(1)

1mVERB LIST0m
altkv bar bootstrap cat check clean-whitespace count-distinct count
count-similar cut decimate fill-down fill-empty filter flatten format-values
fraction gap grep group-by group-like having-fields head histogram json-parse
json-stringify join label latin1-to-utf8 least-frequent merge-fields
most-frequent nest nothing put regularize remove-empty-columns rename reorder
repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records
sort sort-within-records split stats1 stats2 step summary tac tail tee
template top utf8-to-latin1 unflatten uniq unspace unsparsify
count-similar cut decimate downcase fill-down fill-empty filter flatten
format-values fraction gap grep group-by group-like having-fields head
histogram json-parse json-stringify join label latin1-to-utf8 least-frequent
merge-fields most-frequent nest nothing put regularize remove-empty-columns
rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
skip-trivial-records sort sort-within-records split stats1 stats2 step summary
tac tail tee template top utf8-to-latin1 unflatten uniq unspace unsparsify
upcase

1mFUNCTION LIST0m
abs acos acosh any append apply arrayify asin asinh asserting_absent
Expand Down Expand Up @@ -981,6 +982,14 @@ MILLER(1) MILLER(1)
-n {n} Decimation factor (default 10).
-h|--help Show this message.

1mdowncase0m
Usage: mlr downcase [options]
Lowercases strings in record keys and/or values.
Options:
-k Downcase only keys, not keys and values.
-v Downcase only values, not keys and values.
-h|--help Show this message.

1mfill-down0m
Usage: mlr fill-down [options]
If a given record has a missing value for a given field, fill that from
Expand Down Expand Up @@ -2086,6 +2095,14 @@ MILLER(1) MILLER(1)
being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and
'a=,b=3,c=4'.

1mupcase0m
Usage: mlr upcase [options]
Uppercases strings in record keys and/or values.
Options:
-k Upcase only keys, not keys and values.
-v Upcase only values, not keys and values.
-h|--help Show this message.

1mFUNCTIONS FOR FILTER/PUT0m
1mabs0m
(class=math #args=1) Absolute value.
Expand Down Expand Up @@ -3293,4 +3310,4 @@ MILLER(1) MILLER(1)



2023-03-02 MILLER(1)
2023-03-04 MILLER(1)
27 changes: 27 additions & 0 deletions docs/src/reference-verbs.md
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,20 @@ Options:
-h|--help Show this message.
</pre>

## downcase

<pre class="pre-highlight-in-pair">
<b>mlr downcase --help</b>
</pre>
<pre class="pre-non-highlight-in-pair">
Usage: mlr downcase [options]
Lowercases strings in record keys and/or values.
Options:
-k Downcase only keys, not keys and values.
-v Downcase only values, not keys and values.
-h|--help Show this message.
</pre>

## fill-down

<pre class="pre-highlight-in-pair">
Expand Down Expand Up @@ -4267,3 +4281,16 @@ a b v u w x
- - 1 - 2 -
</pre>

## upcase

<pre class="pre-highlight-in-pair">
<b>mlr upcase --help</b>
</pre>
<pre class="pre-non-highlight-in-pair">
Usage: mlr upcase [options]
Uppercases strings in record keys and/or values.
Options:
-k Upcase only keys, not keys and values.
-v Upcase only values, not keys and values.
-h|--help Show this message.
</pre>
11 changes: 11 additions & 0 deletions docs/src/reference-verbs.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,12 @@ GENMD-RUN-COMMAND
mlr decimate --help
GENMD-EOF

## downcase

GENMD-RUN-COMMAND
mlr downcase --help
GENMD-EOF

## fill-down

GENMD-RUN-COMMAND
Expand Down Expand Up @@ -1289,3 +1295,8 @@ GENMD-RUN-COMMAND
mlr --ijson --opprint unsparsify -f a,b,u,v,w,x then regularize data/sparse.json
GENMD-EOF

## upcase

GENMD-RUN-COMMAND
mlr upcase --help
GENMD-EOF
2 changes: 2 additions & 0 deletions internal/pkg/transformers/aaa_transformer_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
CountSimilarSetup,
CutSetup,
DecimateSetup,
DowncaseSetup,
FillDownSetup,
FillEmptySetup,
FilterSetup,
Expand Down Expand Up @@ -75,6 +76,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
UniqSetup,
UnspaceSetup,
UnsparsifySetup,
UpcaseSetup,
}

func ShowHelpForTransformer(verb string) bool {
Expand Down
178 changes: 178 additions & 0 deletions internal/pkg/transformers/downcase.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
package transformers

import (
"container/list"
"fmt"
"os"
"strings"

"github.com/johnkerl/miller/internal/pkg/cli"
"github.com/johnkerl/miller/internal/pkg/mlrval"
"github.com/johnkerl/miller/internal/pkg/types"
)

// ----------------------------------------------------------------
const verbNameDowncase = "downcase"

var DowncaseSetup = TransformerSetup{
Verb: verbNameDowncase,
UsageFunc: transformerDowncaseUsage,
ParseCLIFunc: transformerDowncaseParseCLI,
IgnoresInput: false,
}

func transformerDowncaseUsage(
o *os.File,
) {
fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameDowncase)
fmt.Fprintf(o, "Lowercases strings in record keys and/or values.\n")
fmt.Fprintf(o, "Options:\n")
fmt.Fprintf(o, "-k Downcase only keys, not keys and values.\n")
fmt.Fprintf(o, "-v Downcase only values, not keys and values.\n")
fmt.Fprintf(o, "-h|--help Show this message.\n")
}

func transformerDowncaseParseCLI(
pargi *int,
argc int,
args []string,
_ *cli.TOptions,
doConstruct bool, // false for first pass of CLI-parse, true for second pass
) IRecordTransformer {

// Skip the verb name from the current spot in the mlr command line
argi := *pargi
argi++

which := "keys_and_values"

for argi < argc /* variable increment: 1 or 2 depending on flag */ {
opt := args[argi]
if !strings.HasPrefix(opt, "-") {
break // No more flag options to process
}
if args[argi] == "--" {
break // All transformers must do this so main-flags can follow verb-flags
}
argi++

if opt == "-h" || opt == "--help" {
transformerDowncaseUsage(os.Stdout)
os.Exit(0)

} else if opt == "-k" {
which = "keys_only"

} else if opt == "-v" {
which = "values_only"

} else {
transformerDowncaseUsage(os.Stderr)
os.Exit(1)
}
}

*pargi = argi
if !doConstruct { // All transformers must do this for main command-line parsing
return nil
}

transformer, err := NewTransformerDowncase(which)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}

return transformer
}

// ----------------------------------------------------------------
type TransformerDowncase struct {
recordTransformerFunc RecordTransformerFunc
}

func NewTransformerDowncase(
which string,
) (*TransformerDowncase, error) {
tr := &TransformerDowncase{}
if which == "keys_only" {
tr.recordTransformerFunc = tr.transformKeysOnly
} else if which == "values_only" {
tr.recordTransformerFunc = tr.transformValuesOnly
} else {
tr.recordTransformerFunc = tr.transformKeysAndValues
}
return tr, nil
}

func (tr *TransformerDowncase) Transform(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
if !inrecAndContext.EndOfStream {
tr.recordTransformerFunc(
inrecAndContext,
outputRecordsAndContexts,
inputDownstreamDoneChannel,
outputDownstreamDoneChannel,
)
} else { // end of record stream
outputRecordsAndContexts.PushBack(inrecAndContext)
}
}

func (tr *TransformerDowncase) transformKeysOnly(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
_ <-chan bool,
__ chan<- bool,
) {
inrec := inrecAndContext.Record
newrec := mlrval.NewMlrmapAsRecord()
for pe := inrec.Head; pe != nil; pe = pe.Next {
newkey := strings.ToLower(pe.Key)
// Reference not copy since this is ownership transfer of the value from the now-abandoned inrec
newrec.PutReference(newkey, pe.Value)
}
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
}

func (tr *TransformerDowncase) transformValuesOnly(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
_ <-chan bool,
__ chan<- bool,
) {
inrec := inrecAndContext.Record
for pe := inrec.Head; pe != nil; pe = pe.Next {
stringval, ok := pe.Value.GetStringValue()
if ok {
pe.Value = mlrval.FromString(strings.ToLower(stringval))
}
}
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(inrec, &inrecAndContext.Context))
}

func (tr *TransformerDowncase) transformKeysAndValues(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
_ <-chan bool,
__ chan<- bool,
) {
inrec := inrecAndContext.Record
newrec := mlrval.NewMlrmapAsRecord()
for pe := inrec.Head; pe != nil; pe = pe.Next {
newkey := strings.ToLower(pe.Key)
stringval, ok := pe.Value.GetStringValue()
if ok {
stringval = strings.ToLower(stringval)
newrec.PutReference(newkey, mlrval.FromString(stringval))
} else {
newrec.PutReference(newkey, pe.Value)
}
}
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
}
Loading