diff --git a/docs/src/10min.md b/docs/src/10min.md index 5cadaa2298..33f7252dab 100644 --- a/docs/src/10min.md +++ b/docs/src/10min.md @@ -39,9 +39,6 @@ purple,triangle,false,7,65,80.1405,5.8240 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream But `mlr cat` can also do format conversion -- for example, you can pretty-print in tabular format: @@ -61,9 +58,6 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream `mlr head` and `mlr tail` count records rather than lines. Whether you're getting the first few records or the last few, the CSV header is included either way: @@ -77,9 +71,6 @@ yellow,triangle,true,1,11,43.6498,9.8870 red,square,true,2,15,79.2778,0.0130 red,circle,true,3,16,13.8103,2.9010 red,square,false,4,48,77.5542,7.4670 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -91,9 +82,6 @@ purple,triangle,false,7,65,80.1405,5.8240 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -120,9 +108,6 @@ go tool pprof -http=:8080 foo-stream "rate": 8.2430 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamYou can sort on a single field: @@ -142,9 +127,6 @@ purple square false 10 91 72.3735 8.2430 yellow triangle true 1 11 43.6498 9.8870 purple triangle false 5 51 81.2290 8.5910 purple triangle false 7 65 80.1405 5.8240 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Or, you can sort primarily alphabetically on one field, then secondarily numerically descending on another field, and so on: @@ -164,9 +146,6 @@ red square true 2 15 79.2778 0.0130 purple triangle false 7 65 80.1405 5.8240 purple triangle false 5 51 81.2290 8.5910 yellow triangle true 1 11 43.6498 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream If there are fields you don't want to see in your data, you can use `cut` to keep only the ones you want, in the same order they appeared in the input data: @@ -186,9 +165,6 @@ triangle false circle true circle true square false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream You can also use `cut -o` to keep specified fields, but in your preferred order: @@ -208,9 +184,6 @@ false triangle true circle true circle false square -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream You can use `cut -x` to omit fields you don't care about: @@ -230,9 +203,6 @@ purple 7 65 80.1405 5.8240 yellow 8 73 63.9785 4.2370 yellow 9 87 63.5058 8.3350 purple 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Even though Miller's main selling point is name-indexing, sometimes you really want to refer to a field name by its positional index. Use `$[[3]]` to access the name of field 3 or `$[[[3]]]` to access the value of field 3: @@ -252,9 +222,6 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -272,9 +239,6 @@ purple triangle NEW 7 65 80.1405 5.8240 yellow circle NEW 8 73 63.9785 4.2370 yellow circle NEW 9 87 63.5058 8.3350 purple square NEW 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamYou can find the full list of verbs at the [Verbs Reference](reference-verbs.md) page. @@ -292,9 +256,6 @@ red square true 2 15 79.2778 0.0130 red circle true 3 16 13.8103 2.9010 red square false 4 48 77.5542 7.4670 red square false 6 64 77.1991 9.5310 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -304,9 +265,6 @@ go tool pprof -http=:8080 foo-stream color shape flag k index quantity rate red square true 2 15 79.2778 0.0130 red circle true 3 16 13.8103 2.9010 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Computing new fields @@ -331,9 +289,6 @@ purple triangle false 7 65 80.1405 5.8240 13.760388049450551 purple_triangl yellow circle true 8 73 63.9785 4.2370 15.09995279679018 yellow_circle yellow circle true 9 87 63.5058 8.3350 7.619172165566886 yellow_circle purple square false 10 91 72.3735 8.2430 8.779995147397793 purple_square -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream When you create a new field, it can immediately be used in subsequent statements: @@ -356,9 +311,6 @@ purple triangle false 7 65 80.1405 5.8240 66 4363 yellow circle true 8 73 63.9785 4.2370 74 5484 yellow circle true 9 87 63.5058 8.3350 88 7753 purple square false 10 91 72.3735 8.2430 92 8474 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream For `put` and `filter` we were able to type out expressions using a programming-language syntax. @@ -379,9 +331,6 @@ Zone,Total MWh 17,39.8 24,7.4 30,50.5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -393,9 +342,6 @@ Zone Total MWh 17 39.8 14 27.2 24 7.4 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamFor `put` and `filter` expressions, use `${...}`: @@ -409,9 +355,6 @@ Zone Total MWh Total KWh 17 39.8 39800 24 7.4 7400 30 50.5 50500 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream See also the [section on field names](reference-dsl-variables.md#field-names). @@ -458,9 +401,6 @@ a,b,c 1,2,3 4,5,6 7,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Chaining verbs together @@ -475,12 +415,6 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream This works fine -- but Miller also lets you chain verbs together using the word `then`. Think of this as a Miller-internal pipe that lets you use fewer keystrokes: @@ -493,9 +427,6 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream As another convenience, you can put the filename first using `--from`. When you're interacting with your data at the command line, this makes it easier to up-arrow and append to the previous command: @@ -508,9 +439,6 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -524,9 +452,6 @@ shape quantity square 72.3735 circle 63.5058 circle 63.9785 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Sorts and stats @@ -543,9 +468,6 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Lots of Miller commands take a `-g` option for group-by: here, `head -n 1 -g shape` outputs the first record for each distinct value of the `shape` field. This means we're finding the record with highest `index` field for each distinct `shape` field: @@ -558,9 +480,6 @@ color shape flag k index quantity rate yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 purple triangle false 7 65 80.1405 5.8240 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Statistics can be computed with or without group-by field(s): @@ -574,9 +493,6 @@ shape quantity_count quantity_min quantity_mean quantity_max triangle 3 43.6498 68.33976666666666 81.229 square 4 72.3735 76.60114999999999 79.2778 circle 3 13.8103 47.0982 63.9785 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -591,9 +507,6 @@ circle red 1 13.8103 13.8103 13.8103 triangle purple 2 80.1405 80.68475000000001 81.229 circle yellow 2 63.5058 63.742149999999995 63.9785 square purple 1 72.3735 72.3735 72.3735 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIf your output has a lot of columns, you can use XTAB format to line things up vertically for you instead: @@ -611,9 +524,6 @@ rate_p75 8.5910 rate_p90 9.8870 rate_p99 9.8870 rate_p100 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Unicode and internationalization @@ -646,9 +556,6 @@ UTF-8 data. For example: κόκκινο κύκλος αληθινό 3 16 13.8103 2.9010 κίτρινο κύκλος αληθινό 8 73 63.9785 4.2370 κίτρινο κύκλος αληθινό 9 87 63.5058 8.3350 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -666,9 +573,6 @@ go tool pprof -http=:8080 foo-stream κόκκινο τετράγωνο ψευδές 6 64 77.1991 9.5310 μοβ τρίγωνο ψευδές 7 65 80.1405 5.8240 μοβ τετράγωνο ψευδές 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -686,9 +590,6 @@ go tool pprof -http=:8080 foo-stream желтый КРУГ истина 8 73 63.9785 4.2370 6 желтый КРУГ истина 9 87 63.5058 8.3350 6 фиолетовый КВАДРАТ ложь 10 91 72.3735 8.2430 10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## File formats and format conversion @@ -788,9 +689,6 @@ a matter of specifying input-format and output-format flags: "rate": 0.0130 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -800,9 +698,6 @@ go tool pprof -http=:8080 foo-stream color,shape,flag,k,index,quantity,rate yellow,triangle,true,1,11,43.6498,9.8870 red,square,true,2,15,79.2778,0.0130 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamHowever, if JSON data has map-valued or array-valued fields, Miller gives you choices on how to @@ -843,9 +738,6 @@ We can convert this to CSV, or other tabular formats:
hostname,pid,req.id,req.method,req.path,req.host,req.headers.host,req.headers.user-agent,res.status_code,res.header.content-type,res.header.content-encoding localhost,12345,6789,GET,api/check,foo.bar,bar.baz,browser,200,text,plain -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -863,9 +755,6 @@ req.headers.user-agent browser res.status_code 200 res.header.content-type text res.header.content-encoding plain -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThese transformations are reversible: @@ -897,12 +786,6 @@ These transformations are reversible: } } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream See the [flatten/unflatten page](flatten-unflatten.md) for more information. @@ -992,14 +875,9 @@ If you like, you can first copy off your original data somewhere else, before do Lastly, using `tee` within `put`, you can split your input data into separate files per one or more field names: -
+Likewise, if you need to produce CSV which is lacking its header, you can pipe Miller's output to the system command `sed 1d`, or you can use Miller's `--headerless-csv-output` option: @@ -74,9 +68,6 @@ red,square,1,80,0.219668,0.001257,0.792778,2.944117 red,circle,1,84,0.209017,0.290052,0.138103,5.065034 red,square,0,243,0.956274,0.746720,0.775542,7.117831 purple,triangle,0,257,0.435535,0.859129,0.812290,5.753095 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streammlr --csv --from example.csv put -q 'tee > $shape.".csv", $*'--Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -cat circle.csv diff --git a/docs/src/csv-with-and-without-headers.md b/docs/src/csv-with-and-without-headers.md index 7c02477b26..944255e55b 100644 --- a/docs/src/csv-with-and-without-headers.md +++ b/docs/src/csv-with-and-without-headers.md @@ -41,9 +41,6 @@ John,23,present Fred,34,present Alice,56,missing Carol,45,present -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamFollowing that, you can rename the positionally indexed labels to names with meaning for your context. For example: @@ -57,9 +54,6 @@ John,23,present Fred,34,present Alice,56,missing Carol,45,present -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -88,9 +79,6 @@ red,square,1,80,0.219668,0.001257,0.792778,2.944117 red,circle,1,84,0.209017,0.290052,0.138103,5.065034 red,square,0,243,0.956274,0.746720,0.775542,7.117831 purple,triangle,0,257,0.435535,0.859129,0.812290,5.753095 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamLastly, often we say "CSV" or "TSV" when we have positionally indexed data in columns which are separated by commas or tabs, respectively. In this case it's perhaps simpler to **just use NIDX format** which was designed for this purpose. (See also [File Formats](file-formats.md).) For example: @@ -110,9 +98,6 @@ Lastly, often we say "CSV" or "TSV" when we have positionally indexed data in co 1 Carol 3 present -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Headerless CSV with duplicate field values @@ -149,9 +134,6 @@ see something happened: -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream What happened? @@ -180,9 +162,6 @@ One solution is to use `--implicit-csv-header`, or its shorter alias `--hi`: -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Another solution is to use [NIDX format](file-formats.md#nidx-index-numbered-toolkit-style): @@ -199,9 +178,6 @@ Another solution is to use [NIDX format](file-formats.md#nidx-index-numbered-too -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Either way, since there is no explicit header, fields are named `1` through `9`. We can use the @@ -219,9 +195,6 @@ xsn,ysn,x,y,t,a,e29,e31,e32 -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -236,9 +209,6 @@ xsn,ysn,x,y,t,a,e29,e31,e32 -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Regularizing ragged CSV @@ -270,9 +240,6 @@ a,b,c 1,2,3 4,5, 6,7,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream or, more simply, @@ -290,9 +257,6 @@ a,b,c 1,2,3 4,5, 6,7,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream See also the [record-heterogeneity page](record-heterogeneity.md). diff --git a/docs/src/data-cleaning-examples.md b/docs/src/data-cleaning-examples.md index 59906d5f9a..77c08e6808 100644 --- a/docs/src/data-cleaning-examples.md +++ b/docs/src/data-cleaning-examples.md @@ -40,9 +40,6 @@ barney false betty true fred true wilma true -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -54,9 +51,6 @@ barney 0 betty 1 fred 1 wilma 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamA second option is to flag badly formatted data within the output stream: @@ -70,9 +64,6 @@ barney false true betty true true fred true true wilma 1 false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Or perhaps to flag badly formatted data outside the output stream: @@ -89,9 +80,6 @@ betty true fred true wilma 1 Malformed at NR=4 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream A third way is to abort the process on first instance of bad data: diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 99c9f546bf..4a62754030 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -70,9 +70,6 @@ point_longitude -81.707664 line Residential construction Masonry point_granularity 3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream A few simple queries: @@ -91,9 +88,6 @@ BAKER COUNTY 70 BRADFORD COUNTY 31 HAMILTON COUNTY 35 UNION COUNTY 15 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -103,9 +97,6 @@ go tool pprof -http=:8080 foo-stream line count Residential 30838 Commercial 5796 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamCategorization of total insured value: @@ -117,9 +108,6 @@ Categorization of total insured value: tiv_2012_min 73.37 tiv_2012_mean 2571004.0973420837 tiv_2012_max 1701000000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -133,9 +121,6 @@ Wood Residential 73.37 113493.01704925536 649046.12 Reinforced Concrete Commercial 6416016.01 20212428.681839883 60570000 Reinforced Masonry Commercial 1287817.34 4621372.981117158 16650000 Steel Frame Commercial 29790000 133492500 1701000000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -150,9 +135,6 @@ hu_site_deductible_p90 76.5 hu_site_deductible_p95 6829.2 hu_site_deductible_p99 126270 hu_site_deductible_p100 7380000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -171,7 +153,6 @@ BROWARD COUNTY 0 148500 3258900 CALHOUN COUNTY 0 33339.6 33339.6 CHARLOTTE COUNTY 5400 52650 250994.7 CITRUS COUNTY 1332.9 79974.9 483785.1 -Memory profile started.
@@ -184,9 +165,6 @@ tiv_2011_tiv_2012_ols_m 0.9835583980337723 tiv_2011_tiv_2012_ols_b 433854.6428968317 tiv_2011_tiv_2012_ols_n 36634 tiv_2011_tiv_2012_r2 0.9468258417320189 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -229,9 +207,6 @@ tiv_2011_tiv_2012_ols_m 1.2301 tiv_2011_tiv_2012_ols_b -596.6239 tiv_2011_tiv_2012_ols_n 657 tiv_2011_tiv_2012_r2 0.9335 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Color/shape data @@ -266,9 +241,6 @@ red circle 1 84 0.209017 0.290052 0.138103 5.065034 red square 0 243 0.956274 0.746720 0.775542 7.117831 purple triangle 0 257 0.435535 0.859129 0.812290 5.753095 red square 0 322 0.201551 0.953110 0.771991 5.612050 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Look at uncategorized stats (using [creach](https://github.com/johnkerl/scripts/blob/master/fundam/creach) for spacing). @@ -291,9 +263,6 @@ v_min -0.092709 v_mean 0.49778696586624427 v_max 1.0725 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The histogram shows the different distribution of 0/1 flags: @@ -315,9 +284,6 @@ bin_lo bin_hi flag_count u_count v_count 0.8900000000000002 0.9900000000000002 0 995 993 0.9900000000000002 1.0900000000000003 4020 1013 939 1.0900000000000003 1.1900000000000002 0 0 25 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Look at univariate stats by color and shape. In particular, color-dependent flag probabilities pop out, aligning with their original Bernoulli probabilities from the data-generator script: @@ -335,9 +301,6 @@ orange 0 0.5214521452145214 1 0.001235 0.49053241584158375 0.9988 purple 0 0.09019264448336252 1 0.000266 0.49400496322241666 0.999647 0.000364 0.4970507127845888 0.999975 red 0 0.3031674208144796 1 0.000671 0.49255964641241273 0.999882 -0.092709 0.4965350941607402 1.0725 yellow 0 0.8924274593064402 1 0.0013 0.4971291160651098 0.999923 0.000711 0.5106265987261144 0.999919 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -350,9 +313,6 @@ shape flag_min flag_mean flag_max u_min u_mean u_ma circle 0 0.3998456194519491 1 0.000044 0.498554505982246 0.999923 -0.092709 0.49552416171362396 1.0725 square 0 0.39611178614823817 1 0.000188 0.4993854558930749 0.999969 0.000089 0.49653825929526124 0.999975 triangle 0 0.4015421115065243 1 0.000881 0.49685854240806604 0.999661 0.000717 0.5010495260972719 0.999995 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamLook at bivariate stats by color and shape. In particular, `u,v` pairwise correlation for red circles pops out: @@ -363,9 +323,6 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
u_v_corr w_x_corr 0.1334180491027861 -0.011319841199866178 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -393,7 +350,4 @@ orange triangle -0.030456661186085785 -0.1318699981926352 yellow circle -0.06477331572781474 0.07369449819706045 blue circle -0.10234761901929677 -0.030528539069837757 green triangle -0.10901825107358765 -0.04848782060162929 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/date-time-examples.md b/docs/src/date-time-examples.md index 3453f1a6e7..5bcbdac015 100644 --- a/docs/src/date-time-examples.md +++ b/docs/src/date-time-examples.md @@ -40,9 +40,6 @@ we can use [strptime](reference-verbs.md#strptime) to parse the date field into
date,event 2018-03-07,discovery -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamCaveat: localtime-handling in timezones with DST is still a work in progress; see [https://github.com/johnkerl/miller/issues/170](https://github.com/johnkerl/miller/issues/170) . See also [https://github.com/johnkerl/miller/issues/208](https://github.com/johnkerl/miller/issues/208) -- thanks @aborruso! @@ -108,9 +105,6 @@ Then, filter for adjacent difference not being 86400 (the number of seconds in a
n=774,date=2014-04-19,qoh=130140,datestamp=1397865600,datestamp_delta=259200 n=1119,date=2015-03-31,qoh=181625,datestamp=1427760000,datestamp_delta=172800 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamGiven this, it's now easy to see where the gaps are: @@ -130,9 +124,6 @@ n=777,1=2014-04-21,2=130368 n=778,1=2014-04-22,2=130368 n=779,1=2014-04-23,2=130849 n=780,1=2014-04-24,2=131026 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -150,7 +141,4 @@ n=1122,1=2015-04-02,2=181718 n=1123,1=2015-04-03,2=181835 n=1124,1=2015-04-04,2=182104 n=1125,1=2015-04-05,2=182528 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/dkvp-examples.md b/docs/src/dkvp-examples.md index e4e1c8238b..2f3e3b5108 100644 --- a/docs/src/dkvp-examples.md +++ b/docs/src/dkvp-examples.md @@ -147,9 +147,6 @@ eks pan 2 0.522151 ekspan 2.522151 str str int float str float wye wye 3 0.338318 wyewye 3.338318 str str int float str float eks wye 4 0.134188 ekswye 4.134188 str str int float str float wye pan 5 0.863624 wyepan 5.863624 str str int float str float -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## DKVP I/O in Ruby @@ -268,7 +265,4 @@ eks pan 2 0.522151 ekspan 2.522151 String String Integer Float String Float wye wye 3 0.338318 wyewye 3.338318 String String Integer Float String Float eks wye 4 0.134188 ekswye 4.134188 String String Integer Float String Float wye pan 5 0.863624 wyepan 5.863624 String String Integer Float String Float -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md index 0fe20e71b7..8611a7a22e 100644 --- a/docs/src/file-formats.md +++ b/docs/src/file-formats.md @@ -172,9 +172,6 @@ An **array of single-level objects** is, quite simply, **a table**: "shape": "square" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -193,9 +190,6 @@ go tool pprof -http=:8080 foo-stream "v": 0.001257 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSingle-level JSON data goes back and forth between JSON and tabular formats @@ -208,9 +202,6 @@ in the direct way: color u v yellow 0.632170 0.988721 red 0.219668 0.001257 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -228,9 +219,6 @@ purple triangle 0 65 0.684281 0.582372 0.801405 5.805148 yellow circle 1 73 0.603365 0.423708 0.639785 7.006414 yellow circle 1 87 0.285656 0.833516 0.635058 6.350036 purple square 0 91 0.259926 0.824322 0.723735 6.854221 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream### Nested JSON objects @@ -272,9 +260,6 @@ input as well as output in JSON format, JSON structure is preserved throughout t } } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream But if the input format is JSON and the output format is not (or vice versa) then key-concatenation applies: @@ -288,9 +273,6 @@ flag i attributes.color attributes.shape values.u values.v values.w values.x 1 15 red square 0.219668 0.001257 0.792778 2.944117 1 16 red circle 0.209017 0.290052 0.138103 5.065034 0 48 red square 0.956274 0.746720 0.775542 7.117831 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream This is discussed in more detail on the page [Flatten/unflatten: JSON vs. tabular formats](flatten-unflatten.md). @@ -337,9 +319,6 @@ Miller handles this: "rate": 0.0130 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -348,9 +327,6 @@ go tool pprof -http=:8080 foo-stream{"color": "yellow", "shape": "triangle", "flag": "true", "k": 1, "index": 11, "quantity": 43.6498, "rate": 9.8870} {"color": "red", "shape": "square", "flag": "true", "k": 2, "index": 15, "quantity": 79.2778, "rate": 0.0130} -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNote that for _input_ data, either is acceptable: whether you use `--ijson` or `--ijsonl`, Miller @@ -372,9 +348,6 @@ eks,pan,2,0.758679,0.522151 wye,wye,3,0.204603,0.338318 eks,wye,4,0.381399,0.134188 wye,pan,5,0.573288,0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -387,9 +360,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNote that while Miller is a line-at-a-time processor and retains input lines in memory only where necessary (e.g. for sort), pretty-print output requires it to accumulate all input lines (so that it can compute maximum column widths) before producing any output. This has two consequences: (a) pretty-print output won't work on `tail -f` contexts, where Miller will be waiting for an end-of-file marker which never arrives; (b) pretty-print output for large files is constrained by available machine memory. @@ -411,9 +381,6 @@ For output only (this isn't supported in the input-scanner as of 5.0.0) you can | eks | wye | 4 | 0.381399 | 0.134188 | | wye | pan | 5 | 0.573288 | 0.863624 | +-----+-----+---+----------+----------+ -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Markdown tabular @@ -431,9 +398,6 @@ Markdown format looks like this: | wye | wye | 3 | 0.204603 | 0.338318 | | eks | wye | 4 | 0.381399 | 0.134188 | | wye | pan | 5 | 0.573288 | 0.863624 | -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream which renders like this when dropped into various web tools (e.g. github comments): @@ -522,9 +486,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Such data are easy to generate, e.g. in Ruby with @@ -590,9 +551,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Example with index-numbered input: @@ -613,9 +571,6 @@ early light 1=oh,2=say,3=can,4=you,5=see 1=by,2=the,3=dawn's 1=early,2=light -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Example with index-numbered input and output: @@ -636,9 +591,6 @@ early light say can the dawn's light -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Data-conversion keystroke-savers @@ -729,9 +681,6 @@ type quantity green 678.12 purple 456.78 orange 123.45 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -743,7 +692,4 @@ type quantity green 678.12 purple 456.78 orange 123.45 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/flatten-unflatten.md b/docs/src/flatten-unflatten.md index 7f12042df6..7a3c138d2f 100644 --- a/docs/src/flatten-unflatten.md +++ b/docs/src/flatten-unflatten.md @@ -103,9 +103,6 @@ Flattened to CSV format: a,b.x,b.y 1,2,3 4,5,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Flattened to pretty-print format: @@ -117,9 +114,6 @@ Flattened to pretty-print format: a b.x b.y 1 2 3 4 5 6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Using flatten-separator `:` instead of the default `.`: @@ -131,9 +125,6 @@ Using flatten-separator `:` instead of the default `.`: a b:x b:y 1 2 3 4 5 6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream If the maps are more deeply nested, each level of map keys is joined in: @@ -159,9 +150,6 @@ If the maps are more deeply nested, each level of map keys is joined in: a b.s.w b.s.x b.t.y b.t.z 1 2 3 4 5 6 7 8 9 10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream **Unflattening** is simply the reverse -- from non-JSON back to JSON: @@ -187,9 +175,6 @@ go tool pprof -http=:8080 foo-stream a,b.x,b.y 1,2,3 4,5,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -212,12 +197,6 @@ go tool pprof -http=:8080 foo-stream } } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Converting arrays between JSON and non-JSON @@ -247,9 +226,6 @@ If the input data contains arrays, these are also flattened similarly: the a b.1 b.2 1 2 3 4 5 6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream If the arrays are more deeply nested, each level of arrays keys is joined in: @@ -275,9 +251,6 @@ If the arrays are more deeply nested, each level of arrays keys is joined in: a b.1.1 b.1.2 b.2.1 b.2.2 1 2 3 4 5 6 7 8 9 10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream In the nested-data examples shown here, nested map values are shown containing @@ -307,9 +280,6 @@ though not shown here) nested map values can contain arrays, and vice versa. a,b.1,b.2 1,2,3 4,5,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -326,12 +296,6 @@ go tool pprof -http=:8080 foo-stream "b": [5, 6] } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Auto-inferencing of arrays on unflatten @@ -359,9 +323,6 @@ a.1,a.2,a.3 "a": [4, 5, 6] } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -385,9 +346,6 @@ a.1,a.3,a.5 } } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Manual control @@ -435,9 +393,6 @@ Using JSON output, we can see that `splita` has produced an array-valued field n "components": ["nadir", "west", "our", "org"] } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Using CSV output, with default auto-flatten, we get `components.1` through `components.4`: @@ -449,9 +404,6 @@ Using CSV output, with default auto-flatten, we get `components.1` through `comp host,status,components.1,components.2,components.3,components.4 apoapsis.east.our.org,up,apoapsis,east,our,org nadir.west.our.org,down,nadir,west,our,org -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Using CSV output, without default auto-flatten, we get a JSON-stringified encoding of the `components` field: @@ -463,9 +415,6 @@ Using CSV output, without default auto-flatten, we get a JSON-stringified encodi host,status,components apoapsis.east.our.org,up,"[""apoapsis"", ""east"", ""our"", ""org""]" nadir.west.our.org,down,"[""nadir"", ""west"", ""our"", ""org""]" -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Now suppose we ran this @@ -486,9 +435,6 @@ host nadir.west.our.org status down a ["nadir", "west", "our", "org"] b ["nadir", "west", "our", "org"] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream into a file [data/hostnames.xtab](./data/hostnames.xtab): @@ -530,9 +476,6 @@ leave `b` JSON-stringified: "b": "[\"nadir\", \"west\", \"our\", \"org\"]" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream See also the diff --git a/docs/src/installing-miller.md b/docs/src/installing-miller.md index 926fc94c6a..b5ae44227b 100644 --- a/docs/src/installing-miller.md +++ b/docs/src/installing-miller.md @@ -70,9 +70,6 @@ purple,triangle,false,7,65,80.1405,5.8240 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -90,9 +87,6 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIf you run into issues on these checks, please check out the resources on the [community page](community.md) for help. diff --git a/docs/src/internationalization.md b/docs/src/internationalization.md index 520025c426..5fadcab1ed 100644 --- a/docs/src/internationalization.md +++ b/docs/src/internationalization.md @@ -50,9 +50,6 @@ Support for internationalization includes: κόκκινο κύκλος αληθινό 3 16 13.8103 2.9010 κίτρινο κύκλος αληθινό 8 73 63.9785 4.2370 κίτρινο κύκλος αληθινό 9 87 63.5058 8.3350 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -70,9 +67,6 @@ go tool pprof -http=:8080 foo-stream κόκκινο τετράγωνο ψευδές 6 64 77.1991 9.5310 μοβ τρίγωνο ψευδές 7 65 80.1405 5.8240 μοβ τετράγωνο ψευδές 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -90,7 +84,4 @@ go tool pprof -http=:8080 foo-stream желтый КРУГ истина 8 73 63.9785 4.2370 6 желтый КРУГ истина 9 87 63.5058 8.3350 6 фиолетовый КВАДРАТ ложь 10 91 72.3735 8.2430 10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/keystroke-savers.md b/docs/src/keystroke-savers.md index 709becf14e..1cc2485a12 100644 --- a/docs/src/keystroke-savers.md +++ b/docs/src/keystroke-savers.md @@ -27,9 +27,6 @@ In our examples so far we've often made use of `mlr --icsv --opprint` or `mlr -- color shape flag k index quantity rate yellow triangle true 1 11 43.6498 9.8870 red square true 2 15 79.2778 0.0130 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -56,9 +53,6 @@ go tool pprof -http=:8080 foo-stream "rate": 0.0130 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamYou can get the full list [here](file-formats.md#data-conversion-keystroke-savers). @@ -75,9 +69,6 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -88,9 +79,6 @@ shape quantity square 72.3735 circle 63.5058 circle 63.9785 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIf there's more than one input file, you can use `--mfrom`, then however many file names, then `--` to indicate the end of your input-file-name list: diff --git a/docs/src/log-processing-examples.md b/docs/src/log-processing-examples.md index c835c735ed..ad0b2a333f 100644 --- a/docs/src/log-processing-examples.md +++ b/docs/src/log-processing-examples.md @@ -86,9 +86,6 @@ type hit_mean A1 0.8571428571428571 A4 0.7142857142857143 A9 0.09090909090909091 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -105,9 +102,6 @@ time batch_size num_filtered time_delta num_filtered_delta 2016-09-02T12:35:20Z 100 554 7 61 2016-09-02T12:35:36Z 100 612 16 58 2016-09-02T12:35:42Z 100 728 6 116 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamAlternatively, we can simply group the similar data for a better look: @@ -164,9 +158,6 @@ time batch_size num_filtered 1472819720 100 554 1472819736 100 612 1472819742 100 728 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -221,9 +212,6 @@ time batch_size num_filtered 2016-09-02T12:35:20Z 100 554 2016-09-02T12:35:36Z 100 612 2016-09-02T12:35:42Z 100 728 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Parsing log-file output diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 6c4a7fe2e3..aba4999375 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -23,11 +23,11 @@ MILLER(1) MILLER(1) -NAME +1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such as CSV and tabular JSON. -SYNOPSIS +1mSYNOPSIS0m Usage: mlr [flags] {verb} [verb-dependent options ...] {zero or more file names} @@ -43,16 +43,16 @@ SYNOPSIS https://miller.readthedocs.io -DESCRIPTION +1mDESCRIPTION0m Miller operates on key-value-pair data while the familiar Unix tools operate on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.4.0. + manpage documents mlr 6.5.0. -EXAMPLES +1mEXAMPLES0m mlr --icsv --opprint cat example.csv mlr --icsv --opprint sort -f shape example.csv mlr --icsv --opprint sort -f shape -nr index example.csv @@ -61,7 +61,7 @@ EXAMPLES mlr --icsv --ojson put '$ratio = $quantity / $rate' example.csv mlr --icsv --opprint --from example.csv sort -nr index then cut -f shape,quantity -FILE FORMATS +1mFILE FORMATS0m CSV/CSV-lite: comma-separated values with separate header line TSV: same but with tabs in places of commas +---------------------+ @@ -133,7 +133,7 @@ FILE FORMATS | fox jumped | Record 2: "1":"fox", "2":"jumped" +---------------------+ -HELP OPTIONS +1mHELP OPTIONS0m Type 'mlr help {topic}' for any of the following: Essentials: mlr help topics @@ -189,7 +189,7 @@ HELP OPTIONS Use 'mlr help find ...' for approximate (substring) matches, e.g. 'mlr help find map' for all things with "map" in their names. -VERB LIST +1mVERB LIST0m altkv bar bootstrap cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values fraction gap grep group-by group-like having-fields head histogram json-parse @@ -199,7 +199,7 @@ VERB LIST sort sort-within-records split stats1 stats2 step summary tac tail tee template top utf8-to-latin1 unflatten uniq unsparsify -FUNCTION LIST +1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent asserting_array asserting_bool asserting_boolean asserting_empty asserting_empty_map asserting_error asserting_float asserting_int @@ -225,7 +225,7 @@ FUNCTION LIST version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ -COMMENTS-IN-DATA FLAGS +1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as # This is a comment for a CSV file @@ -254,7 +254,7 @@ COMMENTS-IN-DATA FLAGS Ignore commented lines within input, with specified prefix. -COMPRESSED-DATA FLAGS +1mCOMPRESSED-DATA FLAGS0m Miller offers a few different ways to handle reading data files which have been compressed. @@ -307,7 +307,7 @@ COMPRESSED-DATA FLAGS --zin Uncompress zlib within the Miller process. Done by default if file ends in `.z`. -CSV/TSV-ONLY FLAGS +1mCSV/TSV-ONLY FLAGS0m These are flags which are applicable to CSV format. --allow-ragged-csv-input or --ragged or --allow-ragged-tsv-input @@ -338,7 +338,7 @@ CSV/TSV-ONLY FLAGS -N Keystroke-saver for `--implicit-csv-header --headerless-csv-output`. -FILE-FORMAT FLAGS +1mFILE-FORMAT FLAGS0m See the File formats doc page, and or `mlr help file-formats`, for more about file formats Miller supports. @@ -402,7 +402,7 @@ FILE-FORMAT FLAGS -o {format name} Use format name for output data. For example: `-o csv` is the same as `--ocsv`. -FLATTEN-UNFLATTEN FLAGS +1mFLATTEN-UNFLATTEN FLAGS0m These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening). See the Flatten/unflatten doc page for more information. @@ -423,7 +423,7 @@ FLATTEN-UNFLATTEN FLAGS `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. -FORMAT-CONVERSION KEYSTROKE-SAVER FLAGS +1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m As keystroke-savers for format-conversion you may use the following. The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX, JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown @@ -443,7 +443,7 @@ FORMAT-CONVERSION KEYSTROKE-SAVER FLAGS -p Keystroke-saver for `--nidx --fs space --repifs`. -T Keystroke-saver for `--nidx --fs tab`. -JSON-ONLY FLAGS +1mJSON-ONLY FLAGS0m These are flags which are applicable to JSON output format. --jlistwrap or --jl Wrap JSON output in outermost `[ ]`. This is the @@ -458,7 +458,7 @@ JSON-ONLY FLAGS --no-jvstack Put objects/arrays all on one line for JSON output. This is the default for JSON Lines output format. -LEGACY FLAGS +1mLEGACY FLAGS0m These are flags which don't do anything in the current Miller version. They are accepted as no-op flags in order to keep old scripts from breaking. @@ -489,7 +489,7 @@ LEGACY FLAGS --vflatsep Ignored as of version 6. This functionality is subsumed into JSON formatting. -MISCELLANEOUS FLAGS +1mMISCELLANEOUS FLAGS0m These are flags which don't fit into any other category. --fflush Force buffered output to be written after every output record. The default is flush output after @@ -580,7 +580,7 @@ MISCELLANEOUS FLAGS information please see https://miller.readthedocs.io/en/latest/scripting/. -OUTPUT-COLORIZATION FLAGS +1mOUTPUT-COLORIZATION FLAGS0m Miller uses colors to highlight outputs. You can specify color preferences. Note: output colorization does not work on Windows. @@ -661,14 +661,14 @@ OUTPUT-COLORIZATION FLAGS --value-color Specify the color (see `--list-color-codes` and `--list-color-names`) for record values. -PPRINT-ONLY FLAGS +1mPPRINT-ONLY FLAGS0m These are flags which are applicable to PPRINT format. --barred Prints a border around PPRINT output (not available for input). --right Right-justifies all fields for PPRINT output. -PROFILING FLAGS +1mPROFILING FLAGS0m These are flags for profiling Miller performance. --cpuprofile {CPU-profile file name} Create a CPU-profile file for performance analysis. @@ -682,7 +682,7 @@ PROFILING FLAGS must be the very first thing after 'mlr' on the command line. -SEPARATOR FLAGS +1mSEPARATOR FLAGS0m See the Separators doc page for more about record separators, field separators, and pair separators. Also see the File formats doc page, or `mlr help file-formats`, for more about the file formats Miller supports. @@ -791,7 +791,7 @@ SEPARATOR FLAGS spaces. --rs {string} Specify RS for input and output. -AUXILIARY COMMANDS +1mAUXILIARY COMMANDS0m Available subcommands: aux-list hex @@ -804,7 +804,7 @@ AUXILIARY COMMANDS version For more information, please invoke mlr {subcommand} --help. -MLRRC +1mMLRRC0m You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc. For example, if you usually process CSV, then you can put "--csv" in your .mlrrc file and that will be the default input/output format unless otherwise specified on the command line. @@ -838,7 +838,7 @@ MLRRC See also: https://miller.readthedocs.io/en/latest/customization.html -REPL +1mREPL0m Usage: mlr repl [options] {zero or more data-file names} -v Prints the expressions's AST (abstract syntax tree), which gives full transparency on the precedence and associativity rules of @@ -868,14 +868,14 @@ REPL Any data-file names are opened just as if you had waited and typed :open {filenames} at the Miller REPL prompt. -VERBS - altkv +1mVERBS0m + 1maltkv0m Usage: mlr altkv [options] Given fields with values of the form a,b,c,d,e,f emits a=b,c=d,e=f pairs. Options: -h|--help Show this message. - bar + 1mbar0m Usage: mlr bar [options] Replaces a numeric field with a number of asterisks, allowing for cheesy bar plots. These align best with --opprint or --oxtab output format. @@ -893,7 +893,7 @@ VERBS However you can make them all longer if you so desire. -h|--help Show this message. - bootstrap + 1mbootstrap0m Usage: mlr bootstrap [options] Emits an n-sample, with replacement, of the input records. See also mlr sample and mlr shuffle. @@ -902,7 +902,7 @@ VERBS Must be non-negative. -h|--help Show this message. - cat + 1mcat0m Usage: mlr cat [options] Passes input records directly to output. Most useful for format conversion. Options: @@ -913,14 +913,14 @@ VERBS --filenum Prepend current filenum (1-up) to each record. -h|--help Show this message. - check + 1mcheck0m Usage: mlr check [options] Consumes records without printing any output. Useful for doing a well-formatted check on input data. Options: -h|--help Show this message. - clean-whitespace + 1mclean-whitespace0m Usage: mlr clean-whitespace [options] For each record, for each field in the record, whitespace-cleans the keys and/or values. Whitespace-cleaning entails stripping leading and trailing whitespace, @@ -935,7 +935,7 @@ VERBS leave off -k as well as -v. -h|--help Show this message. - count-distinct + 1mcount-distinct0m Usage: mlr count-distinct [options] Prints number of records having distinct values for specified field names. Same as uniq -c. @@ -951,7 +951,7 @@ VERBS for distinct a field values and counts for distinct b field values separately. - count + 1mcount0m Usage: mlr count [options] Prints number of records, optionally grouped by distinct values for specified field names. Options: @@ -960,7 +960,7 @@ VERBS -o {name} Field name for output-count. Default "count". -h|--help Show this message. - count-similar + 1mcount-similar0m Usage: mlr count-similar [options] Ingests all records, then emits each record augmented by a count of the number of other records having the same group-by field values. @@ -969,7 +969,7 @@ VERBS -o {name} Field name for output-counts. Defaults to "count". -h|--help Show this message. - cut + 1mcut0m Usage: mlr cut [options] Passes through input records with specified fields included/excluded. Options: @@ -989,7 +989,7 @@ VERBS mlr cut -r -f '^status$,"sda[0-9]"' mlr cut -r -f '^status$,"sda[0-9]"i' (this is case-insensitive) - decimate + 1mdecimate0m Usage: mlr decimate [options] Passes through one of every n records, optionally by category. Options: @@ -999,7 +999,7 @@ VERBS -n {n} Decimation factor (default 10). -h|--help Show this message. - fill-down + 1mfill-down0m Usage: mlr fill-down [options] If a given record has a missing value for a given field, fill that from the corresponding value from a previous record, if any. @@ -1015,14 +1015,14 @@ VERBS -f Field names for fill-down. -h|--help Show this message. - fill-empty + 1mfill-empty0m Usage: mlr fill-empty [options] Fills empty-string fields with specified fill-value. Options: -v {string} Fill-value: defaults to "N/A" -S Don't infer type -- so '-v 0' would fill string 0 not int 0. - filter + 1mfilter0m Usage: mlr filter [options] {DSL expression} Options: -f {file name} File containing a DSL expression (see examples below). If the filename @@ -1107,7 +1107,7 @@ VERBS See also https://miller.readthedocs.io/reference-dsl for more context. - flatten + 1mflatten0m Usage: mlr flatten [options] Flattens multi-level maps to single-level ones. Example: field with name 'a' and value '{"b": { "c": 4 }}' becomes name 'a.b.c' and value 4. @@ -1116,7 +1116,7 @@ VERBS -s Separator, defaulting to mlr --flatsep value. -h|--help Show this message. - format-values + 1mformat-values0m Usage: mlr format-values [options] Applies format strings to all field values, depending on autodetected type. * If a field value is detected to be integer, applies integer format. @@ -1147,7 +1147,7 @@ VERBS -n Coerce field values autodetected as int to float, and then apply the float format. - fraction + 1mfraction0m Usage: mlr fraction [options] For each record's value in specified fields, computes the ratio of that value to the sum of values in that field over all input records. @@ -1169,7 +1169,7 @@ VERBS x=1,x_cumulative_fraction=0.1 x=2,x_cumulative_fraction=0.3 x=3,x_cumulative_fraction=0.6 and x=4,x_cumulative_fraction=1.0 - gap + 1mgap0m Usage: mlr gap [options] Emits an empty record every n records, or when certain values change. Options: @@ -1180,7 +1180,7 @@ VERBS -n is ignored if -g is present. -h|--help Show this message. - grep + 1mgrep0m Usage: mlr grep [options] {regular expression} Passes through records which match the regular expression. Options: @@ -1199,18 +1199,18 @@ VERBS features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." - group-by + 1mgroup-by0m Usage: mlr group-by [options] {comma-separated field names} Outputs records in batches having identical values at specified field names.Options: -h|--help Show this message. - group-like + 1mgroup-like0m Usage: mlr group-like [options] Outputs records in batches having identical field names. Options: -h|--help Show this message. - having-fields + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. Options: @@ -1226,7 +1226,7 @@ VERBS mlr having-fields --any-matching '"sda[0-9]"' mlr having-fields --any-matching '"sda[0-9]"i' (this is case-insensitive) - head + 1mhead0m Usage: mlr head [options] Passes through the first n records, optionally by category. Without -g, ceases consuming more input (i.e. is fast) when n records have been read. @@ -1235,7 +1235,7 @@ VERBS -n {n} Head-count to print. Default 10. -h|--help Show this message. - histogram + 1mhistogram0m Just a histogram. Input values < lo or > hi are not counted. Usage: mlr histogram [options] -f {a,b,c} Value-field names for histogram counts @@ -1247,14 +1247,14 @@ VERBS -o {prefix} Prefix for output field name. Default: no prefix. -h|--help Show this message. - json-parse + 1mjson-parse0m Usage: mlr json-parse [options] Tries to convert string field values to parsed JSON, e.g. "[1,2,3]" -> [1,2,3]. Options: -f {...} Comma-separated list of field names to json-parse (default all). -h|--help Show this message. - json-stringify + 1mjson-stringify0m Usage: mlr json-stringify [options] Produces string field values from field-value data, e.g. [1,2,3] -> "[1,2,3]". Options: @@ -1263,7 +1263,7 @@ VERBS --no-jvstack Produce single-line JSON output per record (default). -h|--help Show this message. - join + 1mjoin0m Usage: mlr join [options] Joins records from specified left file name with records from all file names at the end of the Miller argument list. @@ -1316,7 +1316,7 @@ VERBS Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information including examples. - label + 1mlabel0m Usage: mlr label [options] {new1,new2,new3,...} Given n comma-separated names, renames the first n fields of each record to have the respective name. (Fields past the nth are left with their original @@ -1326,14 +1326,14 @@ VERBS Options: -h|--help Show this message. - latin1-to-utf8 + 1mlatin1-to-utf80m Usage: mlr latin1-to-utf8, with no options. Recursively converts record strings from Latin-1 to UTF-8. For field-level control, please see the latin1_to_utf8 DSL function. Options: -h|--help Show this message. - least-frequent + 1mleast-frequent0m Usage: mlr least-frequent [options] Shows the least frequently occurring distinct values for specified field names. The first entry is the statistical anti-mode; the remaining are runners-up. @@ -1344,7 +1344,7 @@ VERBS -o {name} Field name for output count. Default "count". See also "mlr most-frequent". - merge-fields + 1mmerge-fields0m Usage: mlr merge-fields [options] Computes univariate statistics for each input record, accumulated across specified fields. @@ -1393,7 +1393,7 @@ VERBS since "a_in_x" and "a_out_x" both collapse to "a_x", "b_in_y" collapses to "b_y", and "b_out_x" collapses to "b_x". - most-frequent + 1mmost-frequent0m Usage: mlr most-frequent [options] Shows the most frequently occurring distinct values for specified field names. The first entry is the statistical mode; the remaining are runners-up. @@ -1404,7 +1404,7 @@ VERBS -o {name} Field name for output count. Default "count". See also "mlr least-frequent". - nest + 1mnest0m Usage: mlr nest [options] Explodes specified field values into separate fields/records, or reverses this. Options: @@ -1453,14 +1453,14 @@ VERBS e.g. by default the former is semicolon and the latter is comma. See also mlr reshape. - nothing + 1mnothing0m Usage: mlr nothing [options] Drops all input records. Useful for testing, or after tee/print/etc. have produced other output. Options: -h|--help Show this message. - put + 1mput0m Usage: mlr put [options] {DSL expression} Options: -f {file name} File containing a DSL expression (see examples below). If the filename @@ -1540,19 +1540,19 @@ VERBS See also https://miller.readthedocs.io/reference-dsl for more context. - regularize + 1mregularize0m Usage: mlr regularize [options] Outputs records sorted lexically ascending by keys. Options: -h|--help Show this message. - remove-empty-columns + 1mremove-empty-columns0m Usage: mlr remove-empty-columns [options] Omits fields which are empty on every input row. Non-streaming. Options: -h|--help Show this message. - rename + 1mrename0m Usage: mlr rename [options] {old1,new1,old2,new2,...} Renames specified fields. Options: @@ -1575,7 +1575,7 @@ VERBS mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" - reorder + 1mreorder0m Usage: mlr reorder [options] Moves specified names to start of record, or end of record. Options: @@ -1593,7 +1593,7 @@ VERBS mlr reorder -f a,b sends input record "d=4,b=2,a=1,c=3" to "a=1,b=2,d=4,c=3". mlr reorder -e -f a,b sends input record "d=4,b=2,a=1,c=3" to "d=4,c=3,a=1,b=2". - repeat + 1mrepeat0m Usage: mlr repeat [options] Copies input records to output records multiple times. Options must be exactly one of the following: @@ -1620,7 +1620,7 @@ VERBS a=1,b=2,c=3 a=1,b=2,c=3 - reshape + 1mreshape0m Usage: mlr reshape [options] Wide-to-long options: -i {input field names} -o {key-field name,value-field name} @@ -1679,7 +1679,7 @@ VERBS 2009-01-03 0.98012375 1.3179287 See also mlr nest. - sample + 1msample0m Usage: mlr sample [options] Reservoir sampling (subsampling without replacement), optionally by category. See also mlr bootstrap and mlr shuffle. @@ -1688,7 +1688,7 @@ VERBS -k {k} Required: number of records to output in total, or by group if using -g. -h|--help Show this message. - sec2gmtdate + 1msec2gmtdate0m Usage: ../c/mlr sec2gmtdate {comma-separated list of field names} Replaces a numeric field representing seconds since the epoch with the corresponding GMT year-month-day timestamp; leaves non-numbers as-is. @@ -1697,7 +1697,7 @@ VERBS is the same as ../c/mlr put '$time1=sec2gmtdate($time1);$time2=sec2gmtdate($time2)' - sec2gmt + 1msec2gmt0m Usage: mlr sec2gmt [options] {comma-separated list of field names} Replaces a numeric field representing seconds since the epoch with the corresponding GMT timestamp; leaves non-numbers as-is. This is nothing @@ -1712,7 +1712,7 @@ VERBS --nanos Input numbers are treated as nanoseconds since the epoch. -h|--help Show this message. - seqgen + 1mseqgen0m Usage: mlr seqgen [options] Passes input records directly to output. Most useful for format conversion. Produces a sequence of counters. Discards the input record stream. Produces @@ -1728,21 +1728,21 @@ VERBS stop, and step are all integers. Step may be negative. It may not be zero unless start == stop. - shuffle + 1mshuffle0m Usage: mlr shuffle [options] Outputs records randomly permuted. No output records are produced until all input records are read. See also mlr bootstrap and mlr sample. Options: -h|--help Show this message. - skip-trivial-records + 1mskip-trivial-records0m Usage: mlr skip-trivial-records [options] Passes through all records except those with zero fields, or those for which all fields have empty value. Options: -h|--help Show this message. - sort + 1msort0m Usage: mlr sort {flags} Sorts records primarily by the first specified field, secondarily by the second field, and so on. (Any records not having all specified sort keys will appear @@ -1767,14 +1767,14 @@ VERBS which is the same as: mlr sort -f a -f b -nr x -nr y -nr z - sort-within-records + 1msort-within-records0m Usage: mlr sort-within-records [options] Outputs records sorted lexically ascending by keys. Options: -r Recursively sort subobjects/submaps, e.g. for JSON input. -h|--help Show this message. - split + 1msplit0m Usage: mlr split [options] {filename} Options: -n {n}: Cap file sizes at N records. @@ -1814,7 +1814,7 @@ VERBS See also the "tee" DSL function which lets you do more ad-hoc customization. - stats1 + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across the input record stream. @@ -1877,7 +1877,7 @@ VERBS In particular, 1 and 1.0 are distinct text for count and mode. * When there are mode ties, the first-encountered datum wins. - stats2 + 1mstats20m Usage: mlr stats2 [options] Computes bivariate statistics for one or more given field-name pairs, accumulated across the input record stream. @@ -1905,7 +1905,7 @@ VERBS Example: mlr stats2 -a linreg-ols,r2 -f x,y -g size,shape Example: mlr stats2 -a corr -f x,y - step + 1mstep0m Usage: mlr step [options] Computes values dependent on earlier/later records, optionally grouped by category. Options: @@ -1948,7 +1948,7 @@ VERBS https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. - summary + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. @@ -1977,7 +1977,7 @@ VERBS uof upper outer fence: p75 + 3.0 * iqr Default summarizers: - field_type count mean min median max null_count distinct_count + field_type count mean min max null_count distinct_count Notes: * min, p25, median, p75, and max work for strings as well as numbers @@ -1990,13 +1990,13 @@ VERBS --all Use all available summarizers. -h|--help Show this message. - tac + 1mtac0m Usage: mlr tac [options] Prints records in reverse order from the order in which they were encountered. Options: -h|--help Show this message. - tail + 1mtail0m Usage: mlr tail [options] Passes through the last n records, optionally by category. Options: @@ -2004,7 +2004,7 @@ VERBS -n {n} Head-count to print. Default 10. -h|--help Show this message. - tee + 1mtee0m Usage: mlr tee [options] {filename} Options: -a Append to existing file, if any, rather than overwriting. @@ -2016,7 +2016,7 @@ VERBS -h|--help Show this message. - template + 1mtemplate0m Usage: mlr template [options] Places input-record fields in the order specified by list of column names. If the input record is missing a specified field, it will be filled with the fill-with. @@ -2031,7 +2031,7 @@ VERBS * Input record is c=3,a=1,f=6. * Output record is a=1,b=,c=3. - top + 1mtop0m Usage: mlr top [options] -f {a,b,c} Value-field names for top counts. -g {d,e,f} Optional group-by-field names for top counts. @@ -2049,14 +2049,14 @@ VERBS from -f, fields from -g, and the top-index field are emitted. For more information please see https://miller.readthedocs.io/en/latest/reference-verbs#top - utf8-to-latin1 + 1mutf8-to-latin10m Usage: mlr utf8-to-latin1, with no options. Recursively converts record strings from Latin-1 to UTF-8. For field-level control, please see the utf8_to_latin1 DSL function. Options: -h|--help Show this message. - unflatten + 1munflatten0m Usage: mlr unflatten [options] Reverses flatten. Example: field with name 'a.b.c' and value 4 becomes name 'a' and value '{"b": { "c": 4 }}'. @@ -2065,7 +2065,7 @@ VERBS -s {string} Separator, defaulting to mlr --flatsep value. -h|--help Show this message. - uniq + 1muniq0m Usage: mlr uniq [options] Prints distinct values for specified field names. With -c, same as count-distinct. For uniq, -f is a synonym for -g. @@ -2080,7 +2080,7 @@ VERBS With -n, produces only one record which is the unique-record count. With neither -c nor -n, produces unique records. - unsparsify + 1munsparsify0m Usage: mlr unsparsify [options] Prints records with the union of field names over all input records. For field names absent in a given record but present in others, fills in @@ -2095,239 +2095,239 @@ VERBS being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and 'a=,b=3,c=4'. -FUNCTIONS FOR FILTER/PUT - abs +1mFUNCTIONS FOR FILTER/PUT0m + 1mabs0m (class=math #args=1) Absolute value. - acos + 1macos0m (class=math #args=1) Inverse trigonometric cosine. - acosh + 1macosh0m (class=math #args=1) Inverse hyperbolic cosine. - any + 1many0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: any([10,20,30], func(e) {return $index == e}) Map example: any({"a": "foo", "b": "bar"}, func(k,v) {return $[k] == v}) - append + 1mappend0m (class=collections #args=2) Appends second argument to end of first argument, which must be an array. - apply + 1mapply0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, applies the function to each element of the array/map. For arrays, the function should take one argument, for array element; it should return a new element. For maps, it should take two arguments, for map-element key and value; it should return a new key-value pair (i.e. a single-entry map). Examples: Array example: apply([1,2,3,4,5], func(e) {return e ** 3}) returns [1, 8, 27, 64, 125]. Map example: apply({"a":1, "b":3, "c":5}, func(k,v) {return {toupper(k): v ** 2}}) returns {"A": 1, "B":9, "C": 25}", - arrayify + 1marrayify0m (class=collections #args=1) Walks through a nested map/array, converting any map with consecutive keys "1", "2", ... into an array. Useful to wrap the output of unflatten. - asin + 1masin0m (class=math #args=1) Inverse trigonometric sine. - asinh + 1masinh0m (class=math #args=1) Inverse hyperbolic sine. - asserting_absent + 1masserting_absent0m (class=typing #args=1) Aborts with an error if is_absent on the argument returns false, else returns its argument. - asserting_array + 1masserting_array0m (class=typing #args=1) Aborts with an error if is_array on the argument returns false, else returns its argument. - asserting_bool + 1masserting_bool0m (class=typing #args=1) Aborts with an error if is_bool on the argument returns false, else returns its argument. - asserting_boolean + 1masserting_boolean0m (class=typing #args=1) Aborts with an error if is_boolean on the argument returns false, else returns its argument. - asserting_empty + 1masserting_empty0m (class=typing #args=1) Aborts with an error if is_empty on the argument returns false, else returns its argument. - asserting_empty_map + 1masserting_empty_map0m (class=typing #args=1) Aborts with an error if is_empty_map on the argument returns false, else returns its argument. - asserting_error + 1masserting_error0m (class=typing #args=1) Aborts with an error if is_error on the argument returns false, else returns its argument. - asserting_float + 1masserting_float0m (class=typing #args=1) Aborts with an error if is_float on the argument returns false, else returns its argument. - asserting_int + 1masserting_int0m (class=typing #args=1) Aborts with an error if is_int on the argument returns false, else returns its argument. - asserting_map + 1masserting_map0m (class=typing #args=1) Aborts with an error if is_map on the argument returns false, else returns its argument. - asserting_nonempty_map + 1masserting_nonempty_map0m (class=typing #args=1) Aborts with an error if is_nonempty_map on the argument returns false, else returns its argument. - asserting_not_array + 1masserting_not_array0m (class=typing #args=1) Aborts with an error if is_not_array on the argument returns false, else returns its argument. - asserting_not_empty + 1masserting_not_empty0m (class=typing #args=1) Aborts with an error if is_not_empty on the argument returns false, else returns its argument. - asserting_not_map + 1masserting_not_map0m (class=typing #args=1) Aborts with an error if is_not_map on the argument returns false, else returns its argument. - asserting_not_null + 1masserting_not_null0m (class=typing #args=1) Aborts with an error if is_not_null on the argument returns false, else returns its argument. - asserting_null + 1masserting_null0m (class=typing #args=1) Aborts with an error if is_null on the argument returns false, else returns its argument. - asserting_numeric + 1masserting_numeric0m (class=typing #args=1) Aborts with an error if is_numeric on the argument returns false, else returns its argument. - asserting_present + 1masserting_present0m (class=typing #args=1) Aborts with an error if is_present on the argument returns false, else returns its argument. - asserting_string + 1masserting_string0m (class=typing #args=1) Aborts with an error if is_string on the argument returns false, else returns its argument. - atan + 1matan0m (class=math #args=1) One-argument arctangent. - atan2 + 1matan20m (class=math #args=2) Two-argument arctangent. - atanh + 1matanh0m (class=math #args=1) Inverse hyperbolic tangent. - bitcount + 1mbitcount0m (class=arithmetic #args=1) Count of 1-bits. - boolean + 1mboolean0m (class=conversion #args=1) Convert int/float/bool/string to boolean. - capitalize + 1mcapitalize0m (class=string #args=1) Convert string's first character to uppercase. - cbrt + 1mcbrt0m (class=math #args=1) Cube root. - ceil + 1mceil0m (class=math #args=1) Ceiling: nearest integer at or above. - clean_whitespace + 1mclean_whitespace0m (class=string #args=1) Same as collapse_whitespace and strip. - collapse_whitespace + 1mcollapse_whitespace0m (class=string #args=1) Strip repeated whitespace from string. - concat + 1mconcat0m (class=collections #args=variadic) Returns the array concatenation of the arguments. Non-array arguments are treated as single-element arrays. Examples: concat(1,2,3) is [1,2,3] concat([1,2],3) is [1,2,3] concat([1,2],[3]) is [1,2,3] - cos + 1mcos0m (class=math #args=1) Trigonometric cosine. - cosh + 1mcosh0m (class=math #args=1) Hyperbolic cosine. - depth + 1mdepth0m (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0. - dhms2fsec + 1mdhms2fsec0m (class=time #args=1) Recovers floating-point seconds as in dhms2fsec("5d18h53m20.250000s") = 500000.250000 - dhms2sec + 1mdhms2sec0m (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000 - erf + 1merf0m (class=math #args=1) Error function. - erfc + 1merfc0m (class=math #args=1) Complementary error function. - every + 1mevery0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for every array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: every(["a", "b", "c"], func(e) {return $[e] >= 0}) Map example: every({"a": "foo", "b": "bar"}, func(k,v) {return $[k] == v}) - exec + 1mexec0m (class=system #args=variadic) '$output = exec( "command", ["arg1", "arg2"], {"env": ["ENV_VAR=ENV_VALUE", "ENV_VAR2=ENV_VALUE2"], "dir": "/tmp/run_command_here", "stdin_string": "this is input fed to program", "combined_output": true )' Run a command via executable, path, args and environment, yielding its stdout minus final carriage return. Example: exec("echo", ["I don't do", "$SHELL things"], {"env": "SHELL=sh"}) outputs "I don't do $SHELL things" - exp + 1mexp0m (class=math #args=1) Exponential function e**x. - expm1 + 1mexpm10m (class=math #args=1) e**x - 1. - flatten + 1mflatten0m (class=collections #args=2,3) Flattens multi-level maps to single-level ones. Useful for nested JSON-like structures for non-JSON file formats like CSV. With two arguments, the first argument is a map (maybe $*) and the second argument is the flatten separator. With three arguments, the first argument is prefix, the second is the flatten separator, and the third argument is a map; flatten($*, ".") is the same as flatten("", ".", $*). See "Flatten/unflatten: converting between JSON and tabular formats" at https://miller.readthedocs.io for more information. Examples: flatten({"a":[1,2],"b":3}, ".") is {"a.1": 1, "a.2": 2, "b": 3}. flatten("a", ".", {"b": { "c": 4 }}) is {"a.b.c" : 4}. flatten("", ".", {"a": { "b": 3 }}) is {"a.b" : 3}. - float + 1mfloat0m (class=conversion #args=1) Convert int/float/bool/string to float. - floor + 1mfloor0m (class=math #args=1) Floor: nearest integer at or below. - fmtifnum + 1mfmtifnum0m (class=conversion #args=2) Identical to fmtnum, except returns the first argument as-is if the output would be an error. Examples: fmtifnum(3.4, "%.6f") gives 3.400000" fmtifnum("abc", "%.6f") gives abc" $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone - fmtnum + 1mfmtnum0m (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. Example: $x = fmtnum($x, "%.6f") - fold + 1mfold0m (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument. Examples: Array example: fold([1,2,3,4,5], func(acc,e) {return acc + e**3}, 10000) returns 10225. Map example: fold({"a":1, "b":3, "c": 5}, func(acck,accv,ek,ev) {return {"sum": accv+ev**2}}, {"sum":10000}) returns 10035. - format + 1mformat0m (class=string #args=variadic) Using first argument as format string, interpolate remaining arguments in place of each "{}" in the format string. Too-few arguments are treated as the empty string; too-many arguments are discarded. Examples: format("{}:{}:{}", 1,2) gives "1:2:". format("{}:{}:{}", 1,2,3) gives "1:2:3". format("{}:{}:{}", 1,2,3,4) gives "1:2:3". - fsec2dhms + 1mfsec2dhms0m (class=time #args=1) Formats floating-point seconds as in fsec2dhms(500000.25) = "5d18h53m20.250000s" - fsec2hms + 1mfsec2hms0m (class=time #args=1) Formats floating-point seconds as in fsec2hms(5000.25) = "01:23:20.250000" - get_keys + 1mget_keys0m (class=collections #args=1) Returns array of keys of map or array - get_values + 1mget_values0m (class=collections #args=1) Returns array of values of map or array -- in the latter case, returns a copy of the array - gmt2localtime + 1mgmt2localtime0m (class=time #args=1,2) Convert from a GMT-time string to a local-time string. Consulting $TZ unless second argument is supplied. Examples: gmt2localtime("1999-12-31T22:00:00Z") = "2000-01-01 00:00:00" with TZ="Asia/Istanbul" gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" - gmt2sec + 1mgmt2sec0m (class=time #args=1) Parses GMT timestamp as integer seconds since the epoch. Example: gmt2sec("2001-02-03T04:05:06Z") = 981173106 - gssub + 1mgssub0m (class=string #args=3) Like gsub but does no regexing. No characters are special. Example: gssub("ab.d.fg", ".", "X") gives "abXdXfg" - gsub + 1mgsub0m (class=string #args=3) '$name = gsub($name, "old", "new")': replace all, with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to gsub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: gsub("ababab", "ab", "XY") gives "XYXYXY" @@ -2336,244 +2336,244 @@ FUNCTIONS FOR FILTER/PUT gsub("abcdefg", "[ce]", "X") gives "abXdXfg" gsub("prefix4529:suffix8567", "(....ix)([0-9]+)", "[\1 : \2]") gives "[prefix : 4529]:[suffix : 8567]" - haskey + 1mhaskey0m (class=collections #args=2) True/false if map has/hasn't key, e.g. 'haskey($*, "a")' or 'haskey(mymap, mykey)', or true/false if array index is in bounds / out of bounds. Error if 1st argument is not a map or array. Note -n..-1 alias to 1..n in Miller arrays. - hexfmt + 1mhexfmt0m (class=conversion #args=1) Convert int to hex string, e.g. 255 to "0xff". - hms2fsec + 1mhms2fsec0m (class=time #args=1) Recovers floating-point seconds as in hms2fsec("01:23:20.250000") = 5000.250000 - hms2sec + 1mhms2sec0m (class=time #args=1) Recovers integer seconds as in hms2sec("01:23:20") = 5000 - hostname + 1mhostname0m (class=system #args=0) Returns the hostname as a string. - int + 1mint0m (class=conversion #args=1) Convert int/float/bool/string to int. - invqnorm + 1minvqnorm0m (class=math #args=1) Inverse of normal cumulative distribution function. Note that invqorm(urand()) is normally distributed. - is_absent + 1mis_absent0m (class=typing #args=1) False if field is present in input, true otherwise - is_array + 1mis_array0m (class=typing #args=1) True if argument is an array. - is_bool + 1mis_bool0m (class=typing #args=1) True if field is present with boolean value. Synonymous with is_boolean. - is_boolean + 1mis_boolean0m (class=typing #args=1) True if field is present with boolean value. Synonymous with is_bool. - is_empty + 1mis_empty0m (class=typing #args=1) True if field is present in input with empty string value, false otherwise. - is_empty_map + 1mis_empty_map0m (class=typing #args=1) True if argument is a map which is empty. - is_error + 1mis_error0m (class=typing #args=1) True if if argument is an error, such as taking string length of an integer. - is_float + 1mis_float0m (class=typing #args=1) True if field is present with value inferred to be float - is_int + 1mis_int0m (class=typing #args=1) True if field is present with value inferred to be int - is_map + 1mis_map0m (class=typing #args=1) True if argument is a map. - is_nan + 1mis_nan0m (class=typing #args=1) True if the argument is the NaN (not-a-number) floating-point value. Note that NaN has the property that NaN != NaN, so you need 'is_nan(x)' rather than 'x == NaN'. - is_nonempty_map + 1mis_nonempty_map0m (class=typing #args=1) True if argument is a map which is non-empty. - is_not_array + 1mis_not_array0m (class=typing #args=1) True if argument is not an array. - is_not_empty + 1mis_not_empty0m (class=typing #args=1) True if field is present in input with non-empty value, false otherwise - is_not_map + 1mis_not_map0m (class=typing #args=1) True if argument is not a map. - is_not_null + 1mis_not_null0m (class=typing #args=1) False if argument is null (empty, absent, or JSON null), true otherwise. - is_null + 1mis_null0m (class=typing #args=1) True if argument is null (empty, absent, or JSON null), false otherwise. - is_numeric + 1mis_numeric0m (class=typing #args=1) True if field is present with value inferred to be int or float - is_present + 1mis_present0m (class=typing #args=1) True if field is present in input, false otherwise. - is_string + 1mis_string0m (class=typing #args=1) True if field is present with string (including empty-string) value - joink + 1mjoink0m (class=conversion #args=2) Makes string from map/array keys. First argument is map/array; second is separator string. Examples: joink({"a":3,"b":4,"c":5}, ",") = "a,b,c". joink([1,2,3], ",") = "1,2,3". - joinkv + 1mjoinkv0m (class=conversion #args=3) Makes string from map/array key-value pairs. First argument is map/array; second is pair-separator string; third is field-separator string. Mnemonic: the "=" comes before the "," in the output and in the arguments to joinkv. Examples: joinkv([3,4,5], "=", ",") = "1=3,2=4,3=5" joinkv({"a":3,"b":4,"c":5}, ":", ";") = "a:3;b:4;c:5" - joinv + 1mjoinv0m (class=conversion #args=2) Makes string from map/array values. First argument is map/array; second is separator string. Examples: joinv([3,4,5], ",") = "3,4,5" joinv({"a":3,"b":4,"c":5}, ",") = "3,4,5" - json_parse + 1mjson_parse0m (class=collections #args=1) Converts value from JSON-formatted string. - json_stringify + 1mjson_stringify0m (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output. - latin1_to_utf8 + 1mlatin1_to_utf80m (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it. Examples: $y = latin1_to_utf8($x) $* = latin1_to_utf8($*) - leafcount + 1mleafcount0m (class=collections #args=1) Counts total number of terminal values in map/array. For single-level map/array, same as length. - length + 1mlength0m (class=collections #args=1) Counts number of top-level entries in array/map. Scalars have length 1. - localtime2gmt + 1mlocaltime2gmt0m (class=time #args=1,2) Convert from a local-time string to a GMT-time string. Consults $TZ unless second argument is supplied. Examples: localtime2gmt("2000-01-01 00:00:00") = "1999-12-31T22:00:00Z" with TZ="Asia/Istanbul" localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z" - localtime2sec + 1mlocaltime2sec0m (class=time #args=1,2) Parses local timestamp as integer seconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. Examples: localtime2sec("2001-02-03 04:05:06") = 981165906 with TZ="Asia/Istanbul" localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906" - log + 1mlog0m (class=math #args=1) Natural (base-e) logarithm. - log10 + 1mlog100m (class=math #args=1) Base-10 logarithm. - log1p + 1mlog1p0m (class=math #args=1) log(1-x). - logifit + 1mlogifit0m (class=math #args=3) Given m and b from logistic regression, compute fit: $yhat=logifit($x,$m,$b). - lstrip + 1mlstrip0m (class=string #args=1) Strip leading whitespace from string. - madd + 1mmadd0m (class=arithmetic #args=3) a + b mod m (integers) - mapdiff + 1mmapdiff0m (class=collections #args=variadic) With 0 args, returns empty map. With 1 arg, returns copy of arg. With 2 or more, returns copy of arg 1 with all keys from any of remaining argument maps removed. - mapexcept + 1mmapexcept0m (class=collections #args=variadic) Returns a map with keys from remaining arguments, if any, unset. Remaining arguments can be strings or arrays of string. E.g. 'mapexcept({1:2,3:4,5:6}, 1, 5, 7)' is '{3:4}' and 'mapexcept({1:2,3:4,5:6}, [1, 5, 7])' is '{3:4}'. - mapselect + 1mmapselect0m (class=collections #args=variadic) Returns a map with only keys from remaining arguments set. Remaining arguments can be strings or arrays of string. E.g. 'mapselect({1:2,3:4,5:6}, 1, 5, 7)' is '{1:2,5:6}' and 'mapselect({1:2,3:4,5:6}, [1, 5, 7])' is '{1:2,5:6}'. - mapsum + 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. - max + 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. - md5 + 1mmd50m (class=hashing #args=1) MD5 hash. - mexp + 1mmexp0m (class=arithmetic #args=3) a ** b mod m (integers) - min + 1mmin0m (class=math #args=variadic) Min of n numbers; null loses. - mmul + 1mmmul0m (class=arithmetic #args=3) a * b mod m (integers) - msub + 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) - os + 1mos0m (class=system #args=0) Returns the operating-system name as a string. - pow + 1mpow0m (class=arithmetic #args=2) Exponentiation. Same as **, but as a function. - qnorm + 1mqnorm0m (class=math #args=1) Normal cumulative distribution function. - reduce + 1mreduce0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element, and return the accumulated element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is the first element for arrays, or the first element's key-value pair for maps. Examples: Array example: reduce([1,2,3,4,5], func(acc,e) {return acc + e**3}) returns 225. Map example: reduce({"a":1, "b":3, "c": 5}, func(acck,accv,ek,ev) {return {"sum_of_squares": accv + ev**2}}) returns {"sum_of_squares": 35}. - regextract + 1mregextract0m (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does. Examples: regextract("index ab09 file", "[a-z][a-z][0-9][0-9]") gives "ab09" regextract("index a999 file", "[a-z][a-z][0-9][0-9]") gives (absent), which will result in an assignment not happening. - regextract_or_else + 1mregextract_or_else0m (class=string #args=3) Like regextract but the third argument is the return value in case the input string (first argument) doesn't match the pattern (second argument). Examples: regextract_or_else("index ab09 file", "[a-z][a-z][0-9][0-9]", "nonesuch") gives "ab09" regextract_or_else("index a999 file", "[a-z][a-z][0-9][0-9]", "nonesuch") gives "nonesuch" - round + 1mround0m (class=math #args=1) Round to nearest integer. - roundm + 1mroundm0m (class=math #args=2) Round to nearest multiple of m: roundm($x,$m) is the same as round($x/$m)*$m. - rstrip + 1mrstrip0m (class=string #args=1) Strip trailing whitespace from string. - sec2dhms + 1msec2dhms0m (class=time #args=1) Formats integer seconds as in sec2dhms(500000) = "5d18h53m20s" - sec2gmt + 1msec2gmt0m (class=time #args=1,2) Formats seconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. Examples: sec2gmt(1234567890) = "2009-02-13T23:31:30Z" sec2gmt(1234567890.123456) = "2009-02-13T23:31:30Z" sec2gmt(1234567890.123456, 6) = "2009-02-13T23:31:30.123456Z" - sec2gmtdate + 1msec2gmtdate0m (class=time #args=1) Formats seconds since epoch (integer part) as GMT timestamp with year-month-date. Leaves non-numbers as-is. Example: sec2gmtdate(1440768801.7) = "2015-08-28". - sec2hms + 1msec2hms0m (class=time #args=1) Formats integer seconds as in sec2hms(5000) = "01:23:20" - sec2localdate + 1msec2localdate0m (class=time #args=1,2) Formats seconds since epoch (integer part) as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. Examples: sec2localdate(1440768801.7) = "2015-08-28" with TZ="Asia/Istanbul" sec2localdate(1440768801.7, "Asia/Istanbul") = "2015-08-28" - sec2localtime + 1msec2localtime0m (class=time #args=1,2,3) Formats seconds since epoch (integer part) as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part Examples: sec2localtime(1234567890) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" @@ -2581,31 +2581,31 @@ FUNCTIONS FOR FILTER/PUT sec2localtime(1234567890.123456, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" sec2localtime(1234567890.123456, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" - select + 1mselect0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, includes each input element in the output if the function returns true. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: select([1,2,3,4,5], func(e) {return e >= 3}) returns [3, 4, 5]. Map example: select({"a":1, "b":3, "c":5}, func(k,v) {return v >= 3}) returns {"b":3, "c": 5}. - sgn + 1msgn0m (class=math #args=1) +1, 0, -1 for positive, zero, negative input respectively. - sha1 + 1msha10m (class=hashing #args=1) SHA1 hash. - sha256 + 1msha2560m (class=hashing #args=1) SHA256 hash. - sha512 + 1msha5120m (class=hashing #args=1) SHA512 hash. - sin + 1msin0m (class=math #args=1) Trigonometric sine. - sinh + 1msinh0m (class=math #args=1) Hyperbolic sine. - sort + 1msort0m (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values. Examples: Default sorting: sort([3,"A",1,"B",22]) returns [1, 3, 20, "A", "B"]. @@ -2621,67 +2621,67 @@ FUNCTIONS FOR FILTER/PUT Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}. Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}. - splita + 1msplita0m (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on. Example: splita("3,4,5", ",") = [3,4,5] - splitax + 1msplitax0m (class=conversion #args=2) Splits string into array without type inference. First argument is string to split; second is the separator to split on. Example: splitax("3,4,5", ",") = ["3","4","5"] - splitkv + 1msplitkv0m (class=conversion #args=3) Splits string by separators into map with type inference. First argument is string to split; second argument is pair separator; third argument is field separator. Example: splitkv("a=3,b=4,c=5", "=", ",") = {"a":3,"b":4,"c":5} - splitkvx + 1msplitkvx0m (class=conversion #args=3) Splits string by separators into map without type inference (keys and values are strings). First argument is string to split; second argument is pair separator; third argument is field separator. Example: splitkvx("a=3,b=4,c=5", "=", ",") = {"a":"3","b":"4","c":"5"} - splitnv + 1msplitnv0m (class=conversion #args=2) Splits string by separator into integer-indexed map with type inference. First argument is string to split; second argument is separator to split on. Example: splitnv("a,b,c", ",") = {"1":"a","2":"b","3":"c"} - splitnvx + 1msplitnvx0m (class=conversion #args=2) Splits string by separator into integer-indexed map without type inference (values are strings). First argument is string to split; second argument is separator to split on. Example: splitnvx("3,4,5", ",") = {"1":"3","2":"4","3":"5"} - sqrt + 1msqrt0m (class=math #args=1) Square root. - ssub + 1mssub0m (class=string #args=3) Like sub but does no regexing. No characters are special. Example: ssub("abc.def", ".", "X") gives "abcXdef" - strftime + 1mstrftime0m (class=time #args=2) Formats seconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: strftime(1440768801.7,"%Y-%m-%dT%H:%M:%SZ") = "2015-08-28T13:33:21Z" strftime(1440768801.7,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.700Z" - strftime_local + 1mstrftime_local0m (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. Examples: strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%S %z") = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul" strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.700 +0300" with TZ="Asia/Istanbul" strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.700 +0300" - string + 1mstring0m (class=conversion #args=1) Convert int/float/bool/string/array/map to string. - strip + 1mstrip0m (class=string #args=1) Strip leading and trailing whitespace from string. - strlen + 1mstrlen0m (class=string #args=1) String length. - strptime + 1mstrptime0m (class=time #args=2) strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local. Examples: strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000 @@ -2689,7 +2689,7 @@ FUNCTIONS FOR FILTER/PUT strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400 strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200 - strptime_local + 1mstrptime_local0m (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. Examples: strptime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001 with TZ="Asia/Istanbul" @@ -2697,7 +2697,7 @@ FUNCTIONS FOR FILTER/PUT strptime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S") = 1440758001 with TZ="Asia/Istanbul" strptime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001 - sub + 1msub0m (class=string #args=3) '$name = sub($name, "old", "new")': replace once (first match, if there are multiple matches), with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to sub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: sub("ababab", "ab", "XY") gives "XYabab" @@ -2706,229 +2706,229 @@ FUNCTIONS FOR FILTER/PUT sub("abcdefg", "[ce]", "X") gives "abXdefg" sub("prefix4529:suffix8567", "suffix([0-9]+)", "name\1") gives "prefix4529:name8567" - substr + 1msubstr0m (class=string #args=3) substr is an alias for substr0. See also substr1. Miller is generally 1-up with all array and string indices, but, this is a backward-compatibility issue with Miller 5 and below. Arrays are new in Miller 6; the substr function is older. - substr0 + 1msubstr00m (class=string #args=3) substr0(s,m,n) gives substring of s from 0-up position m to n inclusive. Negative indices -len .. -1 alias to 0 .. len-1. See also substr and substr1. - substr1 + 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. - system + 1msystem0m (class=system #args=1) Run command string, yielding its stdout minus final carriage return. - systime + 1msystime0m (class=time #args=0) Returns the system time in floating-point seconds since the epoch. - systimeint + 1msystimeint0m (class=time #args=0) Returns the system time in integer seconds since the epoch. - tan + 1mtan0m (class=math #args=1) Trigonometric tangent. - tanh + 1mtanh0m (class=math #args=1) Hyperbolic tangent. - tolower + 1mtolower0m (class=string #args=1) Convert string to lowercase. - toupper + 1mtoupper0m (class=string #args=1) Convert string to uppercase. - truncate + 1mtruncate0m (class=string #args=2) Truncates string first argument to max length of int second argument. - typeof + 1mtypeof0m (class=typing #args=1) Convert argument to type of argument (e.g. "str"). For debug. - unflatten + 1munflatten0m (class=collections #args=2) Reverses flatten. Useful for nested JSON-like structures for non-JSON file formats like CSV. The first argument is a map, and the second argument is the flatten separator. See also arrayify. See "Flatten/unflatten: converting between JSON and tabular formats" at https://miller.readthedocs.io for more information. Example: unflatten({"a.b.c" : 4}, ".") is {"a": "b": { "c": 4 }}. - unformat + 1munformat0m (class=string #args=2) Using first argument as format string, unpacks second argument into an array of matches, with type-inference. On non-match, returns error -- use is_error() to check. Examples: unformat("{}:{}:{}", "1:2:3") gives [1, 2, 3]. unformat("{}h{}m{}s", "3h47m22s") gives [3, 47, 22]. is_error(unformat("{}h{}m{}s", "3:47:22")) gives true. - unformatx + 1munformatx0m (class=string #args=2) Same as unformat, but without type-inference. Examples: unformatx("{}:{}:{}", "1:2:3") gives ["1", "2", "3"]. unformatx("{}h{}m{}s", "3h47m22s") gives ["3", "47", "22"]. is_error(unformatx("{}h{}m{}s", "3:47:22")) gives true. - uptime + 1muptime0m (class=time #args=0) Returns the time in floating-point seconds since the current Miller program was started. - urand + 1murand0m (class=math #args=0) Floating-point numbers uniformly distributed on the unit interval. Example: Int-valued example: '$n=floor(20+urand()*11)'. - urand32 + 1murand320m (class=math #args=0) Integer uniformly distributed 0 and 2**32-1 inclusive. - urandelement + 1murandelement0m (class=math #args=1) Random sample from the first argument, which must be an non-empty array. - urandint + 1murandint0m (class=math #args=2) Integer uniformly distributed between inclusive integer endpoints. - urandrange + 1murandrange0m (class=math #args=2) Floating-point numbers uniformly distributed on the interval [a, b). - utf8_to_latin1 + 1mutf8_to_latin10m (class=string #args=1) Tries to convert UTF-8-encoded string to Latin-1-encoded string. If argument is array or map, recurses into it. Examples: $y = utf8_to_latin1($x) $* = utf8_to_latin1($*) - version + 1mversion0m (class=system #args=0) Returns the Miller version as a string. - ! + 1m!0m (class=boolean #args=1) Logical negation. - != + 1m!=0m (class=boolean #args=2) String/numeric inequality. Mixing number and string results in string compare. - !=~ + 1m!=~0m (class=boolean #args=2) String (left-hand side) does not match regex (right-hand side), e.g. '$name !=~ "^a.*b$"'. - % + 1m%0m (class=arithmetic #args=2) Remainder; never negative-valued (pythonic). - & + 1m&0m (class=arithmetic #args=2) Bitwise AND. - && + 1m&&0m (class=boolean #args=2) Logical AND. - * + 1m*0m (class=arithmetic #args=2) Multiplication, with integer*integer overflow to float. - ** + 1m**0m (class=arithmetic #args=2) Exponentiation. Same as pow, but as an infix operator. - + + 1m+0m (class=arithmetic #args=1,2) Addition as binary operator; unary plus operator. - - + 1m-0m (class=arithmetic #args=1,2) Subtraction as binary operator; unary negation operator. - . + 1m.0m (class=string #args=2) String concatenation. Non-strings are coerced, so you can do '"ax".98' etc. - .* + 1m.*0m (class=arithmetic #args=2) Multiplication, with integer-to-integer overflow. - .+ + 1m.+0m (class=arithmetic #args=2) Addition, with integer-to-integer overflow. - .- + 1m.-0m (class=arithmetic #args=2) Subtraction, with integer-to-integer overflow. - ./ + 1m./0m (class=arithmetic #args=2) Integer division, rounding toward zero. - / + 1m/0m (class=arithmetic #args=2) Division. Integer / integer is integer when exact, else floating-point: e.g. 6/3 is 2 but 6/4 is 1.5. - // + 1m//0m (class=arithmetic #args=2) Pythonic integer division, rounding toward negative. - < + 1m<0m (class=boolean #args=2) String/numeric less-than. Mixing number and string results in string compare. - << + 1m<<0m (class=arithmetic #args=2) Bitwise left-shift. - <= + 1m<=0m (class=boolean #args=2) String/numeric less-than-or-equals. Mixing number and string results in string compare. - <=> + 1m<=>0m (class=boolean #args=2) Comparator, nominally for sorting. Given a <=> b, returns <0, 0, >0 as a < b, a == b, or a > b, respectively. - == + 1m==0m (class=boolean #args=2) String/numeric equality. Mixing number and string results in string compare. - =~ + 1m=~0m (class=boolean #args=2) String (left-hand side) matches regex (right-hand side), e.g. '$name =~ "^a.*b$"'. Capture groups \1 through \9 are matched from (...) in the right-hand side, and can be used within subsequent DSL statements. See also "Regular expressions" at https://miller.readthedocs.io. Examples: With if-statement: if ($url =~ "http.*com") { ... } Without if-statement: given $line = "index ab09 file", and $line =~ "([a-z][a-z])([0-9][0-9])", then $label = "[\1:\2]", $label is "[ab:09]" - > + 1m>0m (class=boolean #args=2) String/numeric greater-than. Mixing number and string results in string compare. - >= + 1m>=0m (class=boolean #args=2) String/numeric greater-than-or-equals. Mixing number and string results in string compare. - >> + 1m>>0m (class=arithmetic #args=2) Bitwise signed right-shift. - >>> + 1m>>>0m (class=arithmetic #args=2) Bitwise unsigned right-shift. - ?: + 1m?:0m (class=boolean #args=3) Standard ternary operator. - ?? + 1m??0m (class=boolean #args=2) Absent-coalesce operator. $a ?? 1 evaluates to 1 if $a isn't defined in the current record. - ??? + 1m???0m (class=boolean #args=2) Absent/empty-coalesce operator. $a ??? 1 evaluates to 1 if $a isn't defined in the current record, or has empty value. - ^ + 1m^0m (class=arithmetic #args=2) Bitwise XOR. - ^^ + 1m^^0m (class=boolean #args=2) Logical XOR. - | + 1m|0m (class=arithmetic #args=2) Bitwise OR. - || + 1m||0m (class=boolean #args=2) Logical OR. - ~ + 1m~0m (class=arithmetic #args=1) Bitwise NOT. Beware '$y=~$x' since =~ is the regex-match operator: try '$y = ~$x'. -KEYWORDS FOR PUT AND FILTER - all +1mKEYWORDS FOR PUT AND FILTER0m + 1mall0m all: used in "emit1", "emit", "emitp", and "unset" as a synonym for @* - begin + 1mbegin0m begin: defines a block of statements to be executed before input records are ingested. The body statements must be wrapped in curly braces. Example: 'begin { @count = 0 }' - bool + 1mbool0m bool: declares a boolean local variable in the current curly-braced scope. Type-checking happens at assignment: 'bool b = 1' is an error. - break + 1mbreak0m break: causes execution to continue after the body of the current for/while/do-while loop. - call + 1mcall0m call: used for invoking a user-defined subroutine. Example: 'subr s(k,v) { print k . " is " . v} call s("a", $a)' - continue + 1mcontinue0m continue: causes execution to skip the remaining statements in the body of the current for/while/do-while loop. For-loop increments are still applied. - do + 1mdo0m do: with "while", introduces a do-while loop. The body statements must be wrapped in curly braces. - dump + 1mdump0m dump: prints all currently defined out-of-stream variables immediately to stdout as JSON. @@ -2947,21 +2947,21 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump >> "mytap.dat"}' Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump | "jq .[]"}' - edump + 1medump0m edump: prints all currently defined out-of-stream variables immediately to stderr as JSON. Example: mlr --from f.dat put -q '@v[NR]=$*; end { edump }' - elif + 1melif0m elif: the way Miller spells "else if". The body statements must be wrapped in curly braces. - else + 1melse0m else: terminates an if/elif/elif chain. The body statements must be wrapped in curly braces. - emit1 + 1memit10m emit1: inserts an out-of-stream variable into the output record stream. Unlike the other map variants, side-by-sides, indexing, and redirection are not supported, but you can emit any map-valued expression. @@ -2971,7 +2971,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emit + 1memit0m emit: inserts an out-of-stream variable into the output record stream. Hashmap indices present in the data but not slotted by emit arguments are not output. @@ -3002,7 +3002,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emitf + 1memitf0m emitf: inserts non-indexed out-of-stream variable(s) side-by-side into the output record stream. @@ -3030,7 +3030,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emitp + 1memitp0m emitp: inserts an out-of-stream variable into the output record stream. Hashmap indices present in the data but not slotted by emitp arguments are output concatenated with ":". @@ -3060,29 +3060,29 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - end + 1mend0m end: defines a block of statements to be executed after input records are ingested. The body statements must be wrapped in curly braces. Example: 'end { emit @count }' Example: 'end { eprint "Final count is " . @count }' - eprint + 1meprint0m eprint: prints expression immediately to stderr. Example: mlr --from f.dat put -q 'eprint "The sum of x and y is ".($x+$y)' Example: mlr --from f.dat put -q 'for (k, v in $*) { eprint k . " => " . v }' Example: mlr --from f.dat put '(NR % 1000 == 0) { eprint "Checkpoint ".NR}' - eprintn + 1meprintn0m eprintn: prints expression immediately to stderr, without trailing newline. Example: mlr --from f.dat put -q 'eprintn "The sum of x and y is ".($x+$y); eprint ""' - false + 1mfalse0m false: the boolean literal value. - filter + 1mfilter0m filter: includes/excludes the record in the output record stream. Example: mlr --from f.dat put 'filter (NR == 2 || $x > 5.4)' @@ -3093,11 +3093,11 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q '@running_sum += $x * $y; emit @running_sum' - float + 1mfloat0m float: declares a floating-point local variable in the current curly-braced scope. Type-checking happens at assignment: 'float x = 0' is an error. - for + 1mfor0m for: defines a for-loop using one of three styles. The body statements must be wrapped in curly braces. For-loop over stream record: @@ -3114,71 +3114,71 @@ KEYWORDS FOR PUT AND FILTER Example: 'for (var i = 0, var b = 1; i < 10; i += 1, b *= 2) { ... }' - func + 1mfunc0m func: used for defining a user-defined function. Example: 'func f(a,b) { return sqrt(a**2+b**2)} $d = f($x, $y)' - funct + 1mfunct0m funct: used for saying that a function argument is a user-defined function. Example: 'func g(num a, num b, funct f) :num { return f(a**2+b**2) }' - if + 1mif0m if: starts an if/elif/elif chain. The body statements must be wrapped in curly braces. - in + 1min0m in: used in for-loops over stream records or out-of-stream variables. - int + 1mint0m int: declares an integer local variable in the current curly-braced scope. Type-checking happens at assignment: 'int x = 0.0' is an error. - map + 1mmap0m map: declares an map-valued local variable in the current curly-braced scope. Type-checking happens at assignment: 'map b = 0' is an error. map b = {} is always OK. map b = a is OK or not depending on whether a is a map. - num + 1mnum0m num: declares an int/float local variable in the current curly-braced scope. Type-checking happens at assignment: 'num b = true' is an error. - print + 1mprint0m print: prints expression immediately to stdout. Example: mlr --from f.dat put -q 'print "The sum of x and y is ".($x+$y)' Example: mlr --from f.dat put -q 'for (k, v in $*) { print k . " => " . v }' Example: mlr --from f.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}' - printn + 1mprintn0m printn: prints expression immediately to stdout, without trailing newline. Example: mlr --from f.dat put -q 'printn "."; end { print "" }' - return + 1mreturn0m return: specifies the return value from a user-defined function. Omitted return statements (including via if-branches) result in an absent-null return value, which in turns results in a skipped assignment to an LHS. - stderr + 1mstderr0m stderr: Used for tee, emit, emitf, emitp, print, and dump in place of filename to print to standard error. - stdout + 1mstdout0m stdout: Used for tee, emit, emitf, emitp, print, and dump in place of filename to print to standard output. - str + 1mstr0m str: declares a string local variable in the current curly-braced scope. Type-checking happens at assignment. - subr + 1msubr0m subr: used for defining a subroutine. Example: 'subr s(k,v) { print k . " is " . v} call s("a", $a)' - tee + 1mtee0m tee: prints the current record to specified file. This is an immediate print to the specified file (except for pprint format which of course waits until the end of the input stream to format all output). @@ -3204,10 +3204,10 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*' Example: mlr --from f.dat put -q --ojson 'tee | "gzip > /tmp/data-".$a.".gz", $*' - true + 1mtrue0m true: the boolean literal value. - unset + 1munset0m unset: clears field(s) from the current record, or an out-of-stream or local variable. Example: mlr --from f.dat put 'unset $x' @@ -3217,76 +3217,76 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put '...; unset @sums["green"]' Example: mlr --from f.dat put '...; unset @*' - var + 1mvar0m var: declares an untyped local variable in the current curly-braced scope. Examples: 'var a=1', 'var xyz=""' - while + 1mwhile0m while: introduces a while loop, or with "do", introduces a do-while loop. The body statements must be wrapped in curly braces. - ENV + 1mENV0m ENV: access to environment variables by name, e.g. '$home = ENV["HOME"]' - FILENAME + 1mFILENAME0m FILENAME: evaluates to the name of the current file being processed. - FILENUM + 1mFILENUM0m FILENUM: evaluates to the number of the current file being processed, starting with 1. - FNR + 1mFNR0m FNR: evaluates to the number of the current record within the current file being processed, starting with 1. Resets at the start of each file. - IFS + 1mIFS0m IFS: evaluates to the input field separator from the command line. - IPS + 1mIPS0m IPS: evaluates to the input pair separator from the command line. - IRS + 1mIRS0m IRS: evaluates to the input record separator from the command line, or to LF or CRLF from the input data if in autodetect mode (which is the default). - M_E + 1mM_E0m M_E: the mathematical constant e. - M_PI + 1mM_PI0m M_PI: the mathematical constant pi. - NF + 1mNF0m NF: evaluates to the number of fields in the current record. - NR + 1mNR0m NR: evaluates to the number of the current record over all files being processed, starting with 1. Does not reset at the start of each file. - OFS + 1mOFS0m OFS: evaluates to the output field separator from the command line. - OPS + 1mOPS0m OPS: evaluates to the output pair separator from the command line. - ORS + 1mORS0m ORS: evaluates to the output record separator from the command line, or to LF or CRLF from the input data if in autodetect mode (which is the default). -AUTHOR +1mAUTHOR0m Miller is written by John Kerl <kerl.john.r@gmail.com>. This manual page has been composed from Miller's help output by Eric MSP Veith <eveith@veith-m.de>. -SEE ALSO +1mSEE ALSO0m awk(1), sed(1), cut(1), join(1), sort(1), RFC 4180: Common Format and MIME Type for Comma-Separated Values (CSV) Files, the Miller docsite https://miller.readthedocs.io - 2022-09-05 MILLER(1) + 2022-11-27 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index df9bf4e322..1526e60544 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2,11 +2,11 @@ MILLER(1) MILLER(1) -NAME +1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such as CSV and tabular JSON. -SYNOPSIS +1mSYNOPSIS0m Usage: mlr [flags] {verb} [verb-dependent options ...] {zero or more file names} @@ -22,16 +22,16 @@ SYNOPSIS https://miller.readthedocs.io -DESCRIPTION +1mDESCRIPTION0m Miller operates on key-value-pair data while the familiar Unix tools operate on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.4.0. + manpage documents mlr 6.5.0. -EXAMPLES +1mEXAMPLES0m mlr --icsv --opprint cat example.csv mlr --icsv --opprint sort -f shape example.csv mlr --icsv --opprint sort -f shape -nr index example.csv @@ -40,7 +40,7 @@ EXAMPLES mlr --icsv --ojson put '$ratio = $quantity / $rate' example.csv mlr --icsv --opprint --from example.csv sort -nr index then cut -f shape,quantity -FILE FORMATS +1mFILE FORMATS0m CSV/CSV-lite: comma-separated values with separate header line TSV: same but with tabs in places of commas +---------------------+ @@ -112,7 +112,7 @@ FILE FORMATS | fox jumped | Record 2: "1":"fox", "2":"jumped" +---------------------+ -HELP OPTIONS +1mHELP OPTIONS0m Type 'mlr help {topic}' for any of the following: Essentials: mlr help topics @@ -168,7 +168,7 @@ HELP OPTIONS Use 'mlr help find ...' for approximate (substring) matches, e.g. 'mlr help find map' for all things with "map" in their names. -VERB LIST +1mVERB LIST0m altkv bar bootstrap cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values fraction gap grep group-by group-like having-fields head histogram json-parse @@ -178,7 +178,7 @@ VERB LIST sort sort-within-records split stats1 stats2 step summary tac tail tee template top utf8-to-latin1 unflatten uniq unsparsify -FUNCTION LIST +1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent asserting_array asserting_bool asserting_boolean asserting_empty asserting_empty_map asserting_error asserting_float asserting_int @@ -204,7 +204,7 @@ FUNCTION LIST version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ -COMMENTS-IN-DATA FLAGS +1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as # This is a comment for a CSV file @@ -233,7 +233,7 @@ COMMENTS-IN-DATA FLAGS Ignore commented lines within input, with specified prefix. -COMPRESSED-DATA FLAGS +1mCOMPRESSED-DATA FLAGS0m Miller offers a few different ways to handle reading data files which have been compressed. @@ -286,7 +286,7 @@ COMPRESSED-DATA FLAGS --zin Uncompress zlib within the Miller process. Done by default if file ends in `.z`. -CSV/TSV-ONLY FLAGS +1mCSV/TSV-ONLY FLAGS0m These are flags which are applicable to CSV format. --allow-ragged-csv-input or --ragged or --allow-ragged-tsv-input @@ -317,7 +317,7 @@ CSV/TSV-ONLY FLAGS -N Keystroke-saver for `--implicit-csv-header --headerless-csv-output`. -FILE-FORMAT FLAGS +1mFILE-FORMAT FLAGS0m See the File formats doc page, and or `mlr help file-formats`, for more about file formats Miller supports. @@ -381,7 +381,7 @@ FILE-FORMAT FLAGS -o {format name} Use format name for output data. For example: `-o csv` is the same as `--ocsv`. -FLATTEN-UNFLATTEN FLAGS +1mFLATTEN-UNFLATTEN FLAGS0m These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening). See the Flatten/unflatten doc page for more information. @@ -402,7 +402,7 @@ FLATTEN-UNFLATTEN FLAGS `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. -FORMAT-CONVERSION KEYSTROKE-SAVER FLAGS +1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m As keystroke-savers for format-conversion you may use the following. The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX, JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown @@ -422,7 +422,7 @@ FORMAT-CONVERSION KEYSTROKE-SAVER FLAGS -p Keystroke-saver for `--nidx --fs space --repifs`. -T Keystroke-saver for `--nidx --fs tab`. -JSON-ONLY FLAGS +1mJSON-ONLY FLAGS0m These are flags which are applicable to JSON output format. --jlistwrap or --jl Wrap JSON output in outermost `[ ]`. This is the @@ -437,7 +437,7 @@ JSON-ONLY FLAGS --no-jvstack Put objects/arrays all on one line for JSON output. This is the default for JSON Lines output format. -LEGACY FLAGS +1mLEGACY FLAGS0m These are flags which don't do anything in the current Miller version. They are accepted as no-op flags in order to keep old scripts from breaking. @@ -468,7 +468,7 @@ LEGACY FLAGS --vflatsep Ignored as of version 6. This functionality is subsumed into JSON formatting. -MISCELLANEOUS FLAGS +1mMISCELLANEOUS FLAGS0m These are flags which don't fit into any other category. --fflush Force buffered output to be written after every output record. The default is flush output after @@ -559,7 +559,7 @@ MISCELLANEOUS FLAGS information please see https://miller.readthedocs.io/en/latest/scripting/. -OUTPUT-COLORIZATION FLAGS +1mOUTPUT-COLORIZATION FLAGS0m Miller uses colors to highlight outputs. You can specify color preferences. Note: output colorization does not work on Windows. @@ -640,14 +640,14 @@ OUTPUT-COLORIZATION FLAGS --value-color Specify the color (see `--list-color-codes` and `--list-color-names`) for record values. -PPRINT-ONLY FLAGS +1mPPRINT-ONLY FLAGS0m These are flags which are applicable to PPRINT format. --barred Prints a border around PPRINT output (not available for input). --right Right-justifies all fields for PPRINT output. -PROFILING FLAGS +1mPROFILING FLAGS0m These are flags for profiling Miller performance. --cpuprofile {CPU-profile file name} Create a CPU-profile file for performance analysis. @@ -661,7 +661,7 @@ PROFILING FLAGS must be the very first thing after 'mlr' on the command line. -SEPARATOR FLAGS +1mSEPARATOR FLAGS0m See the Separators doc page for more about record separators, field separators, and pair separators. Also see the File formats doc page, or `mlr help file-formats`, for more about the file formats Miller supports. @@ -770,7 +770,7 @@ SEPARATOR FLAGS spaces. --rs {string} Specify RS for input and output. -AUXILIARY COMMANDS +1mAUXILIARY COMMANDS0m Available subcommands: aux-list hex @@ -783,7 +783,7 @@ AUXILIARY COMMANDS version For more information, please invoke mlr {subcommand} --help. -MLRRC +1mMLRRC0m You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc. For example, if you usually process CSV, then you can put "--csv" in your .mlrrc file and that will be the default input/output format unless otherwise specified on the command line. @@ -817,7 +817,7 @@ MLRRC See also: https://miller.readthedocs.io/en/latest/customization.html -REPL +1mREPL0m Usage: mlr repl [options] {zero or more data-file names} -v Prints the expressions's AST (abstract syntax tree), which gives full transparency on the precedence and associativity rules of @@ -847,14 +847,14 @@ REPL Any data-file names are opened just as if you had waited and typed :open {filenames} at the Miller REPL prompt. -VERBS - altkv +1mVERBS0m + 1maltkv0m Usage: mlr altkv [options] Given fields with values of the form a,b,c,d,e,f emits a=b,c=d,e=f pairs. Options: -h|--help Show this message. - bar + 1mbar0m Usage: mlr bar [options] Replaces a numeric field with a number of asterisks, allowing for cheesy bar plots. These align best with --opprint or --oxtab output format. @@ -872,7 +872,7 @@ VERBS However you can make them all longer if you so desire. -h|--help Show this message. - bootstrap + 1mbootstrap0m Usage: mlr bootstrap [options] Emits an n-sample, with replacement, of the input records. See also mlr sample and mlr shuffle. @@ -881,7 +881,7 @@ VERBS Must be non-negative. -h|--help Show this message. - cat + 1mcat0m Usage: mlr cat [options] Passes input records directly to output. Most useful for format conversion. Options: @@ -892,14 +892,14 @@ VERBS --filenum Prepend current filenum (1-up) to each record. -h|--help Show this message. - check + 1mcheck0m Usage: mlr check [options] Consumes records without printing any output. Useful for doing a well-formatted check on input data. Options: -h|--help Show this message. - clean-whitespace + 1mclean-whitespace0m Usage: mlr clean-whitespace [options] For each record, for each field in the record, whitespace-cleans the keys and/or values. Whitespace-cleaning entails stripping leading and trailing whitespace, @@ -914,7 +914,7 @@ VERBS leave off -k as well as -v. -h|--help Show this message. - count-distinct + 1mcount-distinct0m Usage: mlr count-distinct [options] Prints number of records having distinct values for specified field names. Same as uniq -c. @@ -930,7 +930,7 @@ VERBS for distinct a field values and counts for distinct b field values separately. - count + 1mcount0m Usage: mlr count [options] Prints number of records, optionally grouped by distinct values for specified field names. Options: @@ -939,7 +939,7 @@ VERBS -o {name} Field name for output-count. Default "count". -h|--help Show this message. - count-similar + 1mcount-similar0m Usage: mlr count-similar [options] Ingests all records, then emits each record augmented by a count of the number of other records having the same group-by field values. @@ -948,7 +948,7 @@ VERBS -o {name} Field name for output-counts. Defaults to "count". -h|--help Show this message. - cut + 1mcut0m Usage: mlr cut [options] Passes through input records with specified fields included/excluded. Options: @@ -968,7 +968,7 @@ VERBS mlr cut -r -f '^status$,"sda[0-9]"' mlr cut -r -f '^status$,"sda[0-9]"i' (this is case-insensitive) - decimate + 1mdecimate0m Usage: mlr decimate [options] Passes through one of every n records, optionally by category. Options: @@ -978,7 +978,7 @@ VERBS -n {n} Decimation factor (default 10). -h|--help Show this message. - fill-down + 1mfill-down0m Usage: mlr fill-down [options] If a given record has a missing value for a given field, fill that from the corresponding value from a previous record, if any. @@ -994,14 +994,14 @@ VERBS -f Field names for fill-down. -h|--help Show this message. - fill-empty + 1mfill-empty0m Usage: mlr fill-empty [options] Fills empty-string fields with specified fill-value. Options: -v {string} Fill-value: defaults to "N/A" -S Don't infer type -- so '-v 0' would fill string 0 not int 0. - filter + 1mfilter0m Usage: mlr filter [options] {DSL expression} Options: -f {file name} File containing a DSL expression (see examples below). If the filename @@ -1086,7 +1086,7 @@ VERBS See also https://miller.readthedocs.io/reference-dsl for more context. - flatten + 1mflatten0m Usage: mlr flatten [options] Flattens multi-level maps to single-level ones. Example: field with name 'a' and value '{"b": { "c": 4 }}' becomes name 'a.b.c' and value 4. @@ -1095,7 +1095,7 @@ VERBS -s Separator, defaulting to mlr --flatsep value. -h|--help Show this message. - format-values + 1mformat-values0m Usage: mlr format-values [options] Applies format strings to all field values, depending on autodetected type. * If a field value is detected to be integer, applies integer format. @@ -1126,7 +1126,7 @@ VERBS -n Coerce field values autodetected as int to float, and then apply the float format. - fraction + 1mfraction0m Usage: mlr fraction [options] For each record's value in specified fields, computes the ratio of that value to the sum of values in that field over all input records. @@ -1148,7 +1148,7 @@ VERBS x=1,x_cumulative_fraction=0.1 x=2,x_cumulative_fraction=0.3 x=3,x_cumulative_fraction=0.6 and x=4,x_cumulative_fraction=1.0 - gap + 1mgap0m Usage: mlr gap [options] Emits an empty record every n records, or when certain values change. Options: @@ -1159,7 +1159,7 @@ VERBS -n is ignored if -g is present. -h|--help Show this message. - grep + 1mgrep0m Usage: mlr grep [options] {regular expression} Passes through records which match the regular expression. Options: @@ -1178,18 +1178,18 @@ VERBS features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." - group-by + 1mgroup-by0m Usage: mlr group-by [options] {comma-separated field names} Outputs records in batches having identical values at specified field names.Options: -h|--help Show this message. - group-like + 1mgroup-like0m Usage: mlr group-like [options] Outputs records in batches having identical field names. Options: -h|--help Show this message. - having-fields + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. Options: @@ -1205,7 +1205,7 @@ VERBS mlr having-fields --any-matching '"sda[0-9]"' mlr having-fields --any-matching '"sda[0-9]"i' (this is case-insensitive) - head + 1mhead0m Usage: mlr head [options] Passes through the first n records, optionally by category. Without -g, ceases consuming more input (i.e. is fast) when n records have been read. @@ -1214,7 +1214,7 @@ VERBS -n {n} Head-count to print. Default 10. -h|--help Show this message. - histogram + 1mhistogram0m Just a histogram. Input values < lo or > hi are not counted. Usage: mlr histogram [options] -f {a,b,c} Value-field names for histogram counts @@ -1226,14 +1226,14 @@ VERBS -o {prefix} Prefix for output field name. Default: no prefix. -h|--help Show this message. - json-parse + 1mjson-parse0m Usage: mlr json-parse [options] Tries to convert string field values to parsed JSON, e.g. "[1,2,3]" -> [1,2,3]. Options: -f {...} Comma-separated list of field names to json-parse (default all). -h|--help Show this message. - json-stringify + 1mjson-stringify0m Usage: mlr json-stringify [options] Produces string field values from field-value data, e.g. [1,2,3] -> "[1,2,3]". Options: @@ -1242,7 +1242,7 @@ VERBS --no-jvstack Produce single-line JSON output per record (default). -h|--help Show this message. - join + 1mjoin0m Usage: mlr join [options] Joins records from specified left file name with records from all file names at the end of the Miller argument list. @@ -1295,7 +1295,7 @@ VERBS Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information including examples. - label + 1mlabel0m Usage: mlr label [options] {new1,new2,new3,...} Given n comma-separated names, renames the first n fields of each record to have the respective name. (Fields past the nth are left with their original @@ -1305,14 +1305,14 @@ VERBS Options: -h|--help Show this message. - latin1-to-utf8 + 1mlatin1-to-utf80m Usage: mlr latin1-to-utf8, with no options. Recursively converts record strings from Latin-1 to UTF-8. For field-level control, please see the latin1_to_utf8 DSL function. Options: -h|--help Show this message. - least-frequent + 1mleast-frequent0m Usage: mlr least-frequent [options] Shows the least frequently occurring distinct values for specified field names. The first entry is the statistical anti-mode; the remaining are runners-up. @@ -1323,7 +1323,7 @@ VERBS -o {name} Field name for output count. Default "count". See also "mlr most-frequent". - merge-fields + 1mmerge-fields0m Usage: mlr merge-fields [options] Computes univariate statistics for each input record, accumulated across specified fields. @@ -1372,7 +1372,7 @@ VERBS since "a_in_x" and "a_out_x" both collapse to "a_x", "b_in_y" collapses to "b_y", and "b_out_x" collapses to "b_x". - most-frequent + 1mmost-frequent0m Usage: mlr most-frequent [options] Shows the most frequently occurring distinct values for specified field names. The first entry is the statistical mode; the remaining are runners-up. @@ -1383,7 +1383,7 @@ VERBS -o {name} Field name for output count. Default "count". See also "mlr least-frequent". - nest + 1mnest0m Usage: mlr nest [options] Explodes specified field values into separate fields/records, or reverses this. Options: @@ -1432,14 +1432,14 @@ VERBS e.g. by default the former is semicolon and the latter is comma. See also mlr reshape. - nothing + 1mnothing0m Usage: mlr nothing [options] Drops all input records. Useful for testing, or after tee/print/etc. have produced other output. Options: -h|--help Show this message. - put + 1mput0m Usage: mlr put [options] {DSL expression} Options: -f {file name} File containing a DSL expression (see examples below). If the filename @@ -1519,19 +1519,19 @@ VERBS See also https://miller.readthedocs.io/reference-dsl for more context. - regularize + 1mregularize0m Usage: mlr regularize [options] Outputs records sorted lexically ascending by keys. Options: -h|--help Show this message. - remove-empty-columns + 1mremove-empty-columns0m Usage: mlr remove-empty-columns [options] Omits fields which are empty on every input row. Non-streaming. Options: -h|--help Show this message. - rename + 1mrename0m Usage: mlr rename [options] {old1,new1,old2,new2,...} Renames specified fields. Options: @@ -1554,7 +1554,7 @@ VERBS mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" - reorder + 1mreorder0m Usage: mlr reorder [options] Moves specified names to start of record, or end of record. Options: @@ -1572,7 +1572,7 @@ VERBS mlr reorder -f a,b sends input record "d=4,b=2,a=1,c=3" to "a=1,b=2,d=4,c=3". mlr reorder -e -f a,b sends input record "d=4,b=2,a=1,c=3" to "d=4,c=3,a=1,b=2". - repeat + 1mrepeat0m Usage: mlr repeat [options] Copies input records to output records multiple times. Options must be exactly one of the following: @@ -1599,7 +1599,7 @@ VERBS a=1,b=2,c=3 a=1,b=2,c=3 - reshape + 1mreshape0m Usage: mlr reshape [options] Wide-to-long options: -i {input field names} -o {key-field name,value-field name} @@ -1658,7 +1658,7 @@ VERBS 2009-01-03 0.98012375 1.3179287 See also mlr nest. - sample + 1msample0m Usage: mlr sample [options] Reservoir sampling (subsampling without replacement), optionally by category. See also mlr bootstrap and mlr shuffle. @@ -1667,7 +1667,7 @@ VERBS -k {k} Required: number of records to output in total, or by group if using -g. -h|--help Show this message. - sec2gmtdate + 1msec2gmtdate0m Usage: ../c/mlr sec2gmtdate {comma-separated list of field names} Replaces a numeric field representing seconds since the epoch with the corresponding GMT year-month-day timestamp; leaves non-numbers as-is. @@ -1676,7 +1676,7 @@ VERBS is the same as ../c/mlr put '$time1=sec2gmtdate($time1);$time2=sec2gmtdate($time2)' - sec2gmt + 1msec2gmt0m Usage: mlr sec2gmt [options] {comma-separated list of field names} Replaces a numeric field representing seconds since the epoch with the corresponding GMT timestamp; leaves non-numbers as-is. This is nothing @@ -1691,7 +1691,7 @@ VERBS --nanos Input numbers are treated as nanoseconds since the epoch. -h|--help Show this message. - seqgen + 1mseqgen0m Usage: mlr seqgen [options] Passes input records directly to output. Most useful for format conversion. Produces a sequence of counters. Discards the input record stream. Produces @@ -1707,21 +1707,21 @@ VERBS stop, and step are all integers. Step may be negative. It may not be zero unless start == stop. - shuffle + 1mshuffle0m Usage: mlr shuffle [options] Outputs records randomly permuted. No output records are produced until all input records are read. See also mlr bootstrap and mlr sample. Options: -h|--help Show this message. - skip-trivial-records + 1mskip-trivial-records0m Usage: mlr skip-trivial-records [options] Passes through all records except those with zero fields, or those for which all fields have empty value. Options: -h|--help Show this message. - sort + 1msort0m Usage: mlr sort {flags} Sorts records primarily by the first specified field, secondarily by the second field, and so on. (Any records not having all specified sort keys will appear @@ -1746,14 +1746,14 @@ VERBS which is the same as: mlr sort -f a -f b -nr x -nr y -nr z - sort-within-records + 1msort-within-records0m Usage: mlr sort-within-records [options] Outputs records sorted lexically ascending by keys. Options: -r Recursively sort subobjects/submaps, e.g. for JSON input. -h|--help Show this message. - split + 1msplit0m Usage: mlr split [options] {filename} Options: -n {n}: Cap file sizes at N records. @@ -1793,7 +1793,7 @@ VERBS See also the "tee" DSL function which lets you do more ad-hoc customization. - stats1 + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across the input record stream. @@ -1856,7 +1856,7 @@ VERBS In particular, 1 and 1.0 are distinct text for count and mode. * When there are mode ties, the first-encountered datum wins. - stats2 + 1mstats20m Usage: mlr stats2 [options] Computes bivariate statistics for one or more given field-name pairs, accumulated across the input record stream. @@ -1884,7 +1884,7 @@ VERBS Example: mlr stats2 -a linreg-ols,r2 -f x,y -g size,shape Example: mlr stats2 -a corr -f x,y - step + 1mstep0m Usage: mlr step [options] Computes values dependent on earlier/later records, optionally grouped by category. Options: @@ -1927,7 +1927,7 @@ VERBS https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. - summary + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. @@ -1956,7 +1956,7 @@ VERBS uof upper outer fence: p75 + 3.0 * iqr Default summarizers: - field_type count mean min median max null_count distinct_count + field_type count mean min max null_count distinct_count Notes: * min, p25, median, p75, and max work for strings as well as numbers @@ -1969,13 +1969,13 @@ VERBS --all Use all available summarizers. -h|--help Show this message. - tac + 1mtac0m Usage: mlr tac [options] Prints records in reverse order from the order in which they were encountered. Options: -h|--help Show this message. - tail + 1mtail0m Usage: mlr tail [options] Passes through the last n records, optionally by category. Options: @@ -1983,7 +1983,7 @@ VERBS -n {n} Head-count to print. Default 10. -h|--help Show this message. - tee + 1mtee0m Usage: mlr tee [options] {filename} Options: -a Append to existing file, if any, rather than overwriting. @@ -1995,7 +1995,7 @@ VERBS -h|--help Show this message. - template + 1mtemplate0m Usage: mlr template [options] Places input-record fields in the order specified by list of column names. If the input record is missing a specified field, it will be filled with the fill-with. @@ -2010,7 +2010,7 @@ VERBS * Input record is c=3,a=1,f=6. * Output record is a=1,b=,c=3. - top + 1mtop0m Usage: mlr top [options] -f {a,b,c} Value-field names for top counts. -g {d,e,f} Optional group-by-field names for top counts. @@ -2028,14 +2028,14 @@ VERBS from -f, fields from -g, and the top-index field are emitted. For more information please see https://miller.readthedocs.io/en/latest/reference-verbs#top - utf8-to-latin1 + 1mutf8-to-latin10m Usage: mlr utf8-to-latin1, with no options. Recursively converts record strings from Latin-1 to UTF-8. For field-level control, please see the utf8_to_latin1 DSL function. Options: -h|--help Show this message. - unflatten + 1munflatten0m Usage: mlr unflatten [options] Reverses flatten. Example: field with name 'a.b.c' and value 4 becomes name 'a' and value '{"b": { "c": 4 }}'. @@ -2044,7 +2044,7 @@ VERBS -s {string} Separator, defaulting to mlr --flatsep value. -h|--help Show this message. - uniq + 1muniq0m Usage: mlr uniq [options] Prints distinct values for specified field names. With -c, same as count-distinct. For uniq, -f is a synonym for -g. @@ -2059,7 +2059,7 @@ VERBS With -n, produces only one record which is the unique-record count. With neither -c nor -n, produces unique records. - unsparsify + 1munsparsify0m Usage: mlr unsparsify [options] Prints records with the union of field names over all input records. For field names absent in a given record but present in others, fills in @@ -2074,239 +2074,239 @@ VERBS being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and 'a=,b=3,c=4'. -FUNCTIONS FOR FILTER/PUT - abs +1mFUNCTIONS FOR FILTER/PUT0m + 1mabs0m (class=math #args=1) Absolute value. - acos + 1macos0m (class=math #args=1) Inverse trigonometric cosine. - acosh + 1macosh0m (class=math #args=1) Inverse hyperbolic cosine. - any + 1many0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: any([10,20,30], func(e) {return $index == e}) Map example: any({"a": "foo", "b": "bar"}, func(k,v) {return $[k] == v}) - append + 1mappend0m (class=collections #args=2) Appends second argument to end of first argument, which must be an array. - apply + 1mapply0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, applies the function to each element of the array/map. For arrays, the function should take one argument, for array element; it should return a new element. For maps, it should take two arguments, for map-element key and value; it should return a new key-value pair (i.e. a single-entry map). Examples: Array example: apply([1,2,3,4,5], func(e) {return e ** 3}) returns [1, 8, 27, 64, 125]. Map example: apply({"a":1, "b":3, "c":5}, func(k,v) {return {toupper(k): v ** 2}}) returns {"A": 1, "B":9, "C": 25}", - arrayify + 1marrayify0m (class=collections #args=1) Walks through a nested map/array, converting any map with consecutive keys "1", "2", ... into an array. Useful to wrap the output of unflatten. - asin + 1masin0m (class=math #args=1) Inverse trigonometric sine. - asinh + 1masinh0m (class=math #args=1) Inverse hyperbolic sine. - asserting_absent + 1masserting_absent0m (class=typing #args=1) Aborts with an error if is_absent on the argument returns false, else returns its argument. - asserting_array + 1masserting_array0m (class=typing #args=1) Aborts with an error if is_array on the argument returns false, else returns its argument. - asserting_bool + 1masserting_bool0m (class=typing #args=1) Aborts with an error if is_bool on the argument returns false, else returns its argument. - asserting_boolean + 1masserting_boolean0m (class=typing #args=1) Aborts with an error if is_boolean on the argument returns false, else returns its argument. - asserting_empty + 1masserting_empty0m (class=typing #args=1) Aborts with an error if is_empty on the argument returns false, else returns its argument. - asserting_empty_map + 1masserting_empty_map0m (class=typing #args=1) Aborts with an error if is_empty_map on the argument returns false, else returns its argument. - asserting_error + 1masserting_error0m (class=typing #args=1) Aborts with an error if is_error on the argument returns false, else returns its argument. - asserting_float + 1masserting_float0m (class=typing #args=1) Aborts with an error if is_float on the argument returns false, else returns its argument. - asserting_int + 1masserting_int0m (class=typing #args=1) Aborts with an error if is_int on the argument returns false, else returns its argument. - asserting_map + 1masserting_map0m (class=typing #args=1) Aborts with an error if is_map on the argument returns false, else returns its argument. - asserting_nonempty_map + 1masserting_nonempty_map0m (class=typing #args=1) Aborts with an error if is_nonempty_map on the argument returns false, else returns its argument. - asserting_not_array + 1masserting_not_array0m (class=typing #args=1) Aborts with an error if is_not_array on the argument returns false, else returns its argument. - asserting_not_empty + 1masserting_not_empty0m (class=typing #args=1) Aborts with an error if is_not_empty on the argument returns false, else returns its argument. - asserting_not_map + 1masserting_not_map0m (class=typing #args=1) Aborts with an error if is_not_map on the argument returns false, else returns its argument. - asserting_not_null + 1masserting_not_null0m (class=typing #args=1) Aborts with an error if is_not_null on the argument returns false, else returns its argument. - asserting_null + 1masserting_null0m (class=typing #args=1) Aborts with an error if is_null on the argument returns false, else returns its argument. - asserting_numeric + 1masserting_numeric0m (class=typing #args=1) Aborts with an error if is_numeric on the argument returns false, else returns its argument. - asserting_present + 1masserting_present0m (class=typing #args=1) Aborts with an error if is_present on the argument returns false, else returns its argument. - asserting_string + 1masserting_string0m (class=typing #args=1) Aborts with an error if is_string on the argument returns false, else returns its argument. - atan + 1matan0m (class=math #args=1) One-argument arctangent. - atan2 + 1matan20m (class=math #args=2) Two-argument arctangent. - atanh + 1matanh0m (class=math #args=1) Inverse hyperbolic tangent. - bitcount + 1mbitcount0m (class=arithmetic #args=1) Count of 1-bits. - boolean + 1mboolean0m (class=conversion #args=1) Convert int/float/bool/string to boolean. - capitalize + 1mcapitalize0m (class=string #args=1) Convert string's first character to uppercase. - cbrt + 1mcbrt0m (class=math #args=1) Cube root. - ceil + 1mceil0m (class=math #args=1) Ceiling: nearest integer at or above. - clean_whitespace + 1mclean_whitespace0m (class=string #args=1) Same as collapse_whitespace and strip. - collapse_whitespace + 1mcollapse_whitespace0m (class=string #args=1) Strip repeated whitespace from string. - concat + 1mconcat0m (class=collections #args=variadic) Returns the array concatenation of the arguments. Non-array arguments are treated as single-element arrays. Examples: concat(1,2,3) is [1,2,3] concat([1,2],3) is [1,2,3] concat([1,2],[3]) is [1,2,3] - cos + 1mcos0m (class=math #args=1) Trigonometric cosine. - cosh + 1mcosh0m (class=math #args=1) Hyperbolic cosine. - depth + 1mdepth0m (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0. - dhms2fsec + 1mdhms2fsec0m (class=time #args=1) Recovers floating-point seconds as in dhms2fsec("5d18h53m20.250000s") = 500000.250000 - dhms2sec + 1mdhms2sec0m (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000 - erf + 1merf0m (class=math #args=1) Error function. - erfc + 1merfc0m (class=math #args=1) Complementary error function. - every + 1mevery0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for every array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: every(["a", "b", "c"], func(e) {return $[e] >= 0}) Map example: every({"a": "foo", "b": "bar"}, func(k,v) {return $[k] == v}) - exec + 1mexec0m (class=system #args=variadic) '$output = exec( "command", ["arg1", "arg2"], {"env": ["ENV_VAR=ENV_VALUE", "ENV_VAR2=ENV_VALUE2"], "dir": "/tmp/run_command_here", "stdin_string": "this is input fed to program", "combined_output": true )' Run a command via executable, path, args and environment, yielding its stdout minus final carriage return. Example: exec("echo", ["I don't do", "$SHELL things"], {"env": "SHELL=sh"}) outputs "I don't do $SHELL things" - exp + 1mexp0m (class=math #args=1) Exponential function e**x. - expm1 + 1mexpm10m (class=math #args=1) e**x - 1. - flatten + 1mflatten0m (class=collections #args=2,3) Flattens multi-level maps to single-level ones. Useful for nested JSON-like structures for non-JSON file formats like CSV. With two arguments, the first argument is a map (maybe $*) and the second argument is the flatten separator. With three arguments, the first argument is prefix, the second is the flatten separator, and the third argument is a map; flatten($*, ".") is the same as flatten("", ".", $*). See "Flatten/unflatten: converting between JSON and tabular formats" at https://miller.readthedocs.io for more information. Examples: flatten({"a":[1,2],"b":3}, ".") is {"a.1": 1, "a.2": 2, "b": 3}. flatten("a", ".", {"b": { "c": 4 }}) is {"a.b.c" : 4}. flatten("", ".", {"a": { "b": 3 }}) is {"a.b" : 3}. - float + 1mfloat0m (class=conversion #args=1) Convert int/float/bool/string to float. - floor + 1mfloor0m (class=math #args=1) Floor: nearest integer at or below. - fmtifnum + 1mfmtifnum0m (class=conversion #args=2) Identical to fmtnum, except returns the first argument as-is if the output would be an error. Examples: fmtifnum(3.4, "%.6f") gives 3.400000" fmtifnum("abc", "%.6f") gives abc" $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone - fmtnum + 1mfmtnum0m (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. Example: $x = fmtnum($x, "%.6f") - fold + 1mfold0m (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument. Examples: Array example: fold([1,2,3,4,5], func(acc,e) {return acc + e**3}, 10000) returns 10225. Map example: fold({"a":1, "b":3, "c": 5}, func(acck,accv,ek,ev) {return {"sum": accv+ev**2}}, {"sum":10000}) returns 10035. - format + 1mformat0m (class=string #args=variadic) Using first argument as format string, interpolate remaining arguments in place of each "{}" in the format string. Too-few arguments are treated as the empty string; too-many arguments are discarded. Examples: format("{}:{}:{}", 1,2) gives "1:2:". format("{}:{}:{}", 1,2,3) gives "1:2:3". format("{}:{}:{}", 1,2,3,4) gives "1:2:3". - fsec2dhms + 1mfsec2dhms0m (class=time #args=1) Formats floating-point seconds as in fsec2dhms(500000.25) = "5d18h53m20.250000s" - fsec2hms + 1mfsec2hms0m (class=time #args=1) Formats floating-point seconds as in fsec2hms(5000.25) = "01:23:20.250000" - get_keys + 1mget_keys0m (class=collections #args=1) Returns array of keys of map or array - get_values + 1mget_values0m (class=collections #args=1) Returns array of values of map or array -- in the latter case, returns a copy of the array - gmt2localtime + 1mgmt2localtime0m (class=time #args=1,2) Convert from a GMT-time string to a local-time string. Consulting $TZ unless second argument is supplied. Examples: gmt2localtime("1999-12-31T22:00:00Z") = "2000-01-01 00:00:00" with TZ="Asia/Istanbul" gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" - gmt2sec + 1mgmt2sec0m (class=time #args=1) Parses GMT timestamp as integer seconds since the epoch. Example: gmt2sec("2001-02-03T04:05:06Z") = 981173106 - gssub + 1mgssub0m (class=string #args=3) Like gsub but does no regexing. No characters are special. Example: gssub("ab.d.fg", ".", "X") gives "abXdXfg" - gsub + 1mgsub0m (class=string #args=3) '$name = gsub($name, "old", "new")': replace all, with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to gsub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: gsub("ababab", "ab", "XY") gives "XYXYXY" @@ -2315,244 +2315,244 @@ FUNCTIONS FOR FILTER/PUT gsub("abcdefg", "[ce]", "X") gives "abXdXfg" gsub("prefix4529:suffix8567", "(....ix)([0-9]+)", "[\1 : \2]") gives "[prefix : 4529]:[suffix : 8567]" - haskey + 1mhaskey0m (class=collections #args=2) True/false if map has/hasn't key, e.g. 'haskey($*, "a")' or 'haskey(mymap, mykey)', or true/false if array index is in bounds / out of bounds. Error if 1st argument is not a map or array. Note -n..-1 alias to 1..n in Miller arrays. - hexfmt + 1mhexfmt0m (class=conversion #args=1) Convert int to hex string, e.g. 255 to "0xff". - hms2fsec + 1mhms2fsec0m (class=time #args=1) Recovers floating-point seconds as in hms2fsec("01:23:20.250000") = 5000.250000 - hms2sec + 1mhms2sec0m (class=time #args=1) Recovers integer seconds as in hms2sec("01:23:20") = 5000 - hostname + 1mhostname0m (class=system #args=0) Returns the hostname as a string. - int + 1mint0m (class=conversion #args=1) Convert int/float/bool/string to int. - invqnorm + 1minvqnorm0m (class=math #args=1) Inverse of normal cumulative distribution function. Note that invqorm(urand()) is normally distributed. - is_absent + 1mis_absent0m (class=typing #args=1) False if field is present in input, true otherwise - is_array + 1mis_array0m (class=typing #args=1) True if argument is an array. - is_bool + 1mis_bool0m (class=typing #args=1) True if field is present with boolean value. Synonymous with is_boolean. - is_boolean + 1mis_boolean0m (class=typing #args=1) True if field is present with boolean value. Synonymous with is_bool. - is_empty + 1mis_empty0m (class=typing #args=1) True if field is present in input with empty string value, false otherwise. - is_empty_map + 1mis_empty_map0m (class=typing #args=1) True if argument is a map which is empty. - is_error + 1mis_error0m (class=typing #args=1) True if if argument is an error, such as taking string length of an integer. - is_float + 1mis_float0m (class=typing #args=1) True if field is present with value inferred to be float - is_int + 1mis_int0m (class=typing #args=1) True if field is present with value inferred to be int - is_map + 1mis_map0m (class=typing #args=1) True if argument is a map. - is_nan + 1mis_nan0m (class=typing #args=1) True if the argument is the NaN (not-a-number) floating-point value. Note that NaN has the property that NaN != NaN, so you need 'is_nan(x)' rather than 'x == NaN'. - is_nonempty_map + 1mis_nonempty_map0m (class=typing #args=1) True if argument is a map which is non-empty. - is_not_array + 1mis_not_array0m (class=typing #args=1) True if argument is not an array. - is_not_empty + 1mis_not_empty0m (class=typing #args=1) True if field is present in input with non-empty value, false otherwise - is_not_map + 1mis_not_map0m (class=typing #args=1) True if argument is not a map. - is_not_null + 1mis_not_null0m (class=typing #args=1) False if argument is null (empty, absent, or JSON null), true otherwise. - is_null + 1mis_null0m (class=typing #args=1) True if argument is null (empty, absent, or JSON null), false otherwise. - is_numeric + 1mis_numeric0m (class=typing #args=1) True if field is present with value inferred to be int or float - is_present + 1mis_present0m (class=typing #args=1) True if field is present in input, false otherwise. - is_string + 1mis_string0m (class=typing #args=1) True if field is present with string (including empty-string) value - joink + 1mjoink0m (class=conversion #args=2) Makes string from map/array keys. First argument is map/array; second is separator string. Examples: joink({"a":3,"b":4,"c":5}, ",") = "a,b,c". joink([1,2,3], ",") = "1,2,3". - joinkv + 1mjoinkv0m (class=conversion #args=3) Makes string from map/array key-value pairs. First argument is map/array; second is pair-separator string; third is field-separator string. Mnemonic: the "=" comes before the "," in the output and in the arguments to joinkv. Examples: joinkv([3,4,5], "=", ",") = "1=3,2=4,3=5" joinkv({"a":3,"b":4,"c":5}, ":", ";") = "a:3;b:4;c:5" - joinv + 1mjoinv0m (class=conversion #args=2) Makes string from map/array values. First argument is map/array; second is separator string. Examples: joinv([3,4,5], ",") = "3,4,5" joinv({"a":3,"b":4,"c":5}, ",") = "3,4,5" - json_parse + 1mjson_parse0m (class=collections #args=1) Converts value from JSON-formatted string. - json_stringify + 1mjson_stringify0m (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output. - latin1_to_utf8 + 1mlatin1_to_utf80m (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it. Examples: $y = latin1_to_utf8($x) $* = latin1_to_utf8($*) - leafcount + 1mleafcount0m (class=collections #args=1) Counts total number of terminal values in map/array. For single-level map/array, same as length. - length + 1mlength0m (class=collections #args=1) Counts number of top-level entries in array/map. Scalars have length 1. - localtime2gmt + 1mlocaltime2gmt0m (class=time #args=1,2) Convert from a local-time string to a GMT-time string. Consults $TZ unless second argument is supplied. Examples: localtime2gmt("2000-01-01 00:00:00") = "1999-12-31T22:00:00Z" with TZ="Asia/Istanbul" localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z" - localtime2sec + 1mlocaltime2sec0m (class=time #args=1,2) Parses local timestamp as integer seconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. Examples: localtime2sec("2001-02-03 04:05:06") = 981165906 with TZ="Asia/Istanbul" localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906" - log + 1mlog0m (class=math #args=1) Natural (base-e) logarithm. - log10 + 1mlog100m (class=math #args=1) Base-10 logarithm. - log1p + 1mlog1p0m (class=math #args=1) log(1-x). - logifit + 1mlogifit0m (class=math #args=3) Given m and b from logistic regression, compute fit: $yhat=logifit($x,$m,$b). - lstrip + 1mlstrip0m (class=string #args=1) Strip leading whitespace from string. - madd + 1mmadd0m (class=arithmetic #args=3) a + b mod m (integers) - mapdiff + 1mmapdiff0m (class=collections #args=variadic) With 0 args, returns empty map. With 1 arg, returns copy of arg. With 2 or more, returns copy of arg 1 with all keys from any of remaining argument maps removed. - mapexcept + 1mmapexcept0m (class=collections #args=variadic) Returns a map with keys from remaining arguments, if any, unset. Remaining arguments can be strings or arrays of string. E.g. 'mapexcept({1:2,3:4,5:6}, 1, 5, 7)' is '{3:4}' and 'mapexcept({1:2,3:4,5:6}, [1, 5, 7])' is '{3:4}'. - mapselect + 1mmapselect0m (class=collections #args=variadic) Returns a map with only keys from remaining arguments set. Remaining arguments can be strings or arrays of string. E.g. 'mapselect({1:2,3:4,5:6}, 1, 5, 7)' is '{1:2,5:6}' and 'mapselect({1:2,3:4,5:6}, [1, 5, 7])' is '{1:2,5:6}'. - mapsum + 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. - max + 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. - md5 + 1mmd50m (class=hashing #args=1) MD5 hash. - mexp + 1mmexp0m (class=arithmetic #args=3) a ** b mod m (integers) - min + 1mmin0m (class=math #args=variadic) Min of n numbers; null loses. - mmul + 1mmmul0m (class=arithmetic #args=3) a * b mod m (integers) - msub + 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) - os + 1mos0m (class=system #args=0) Returns the operating-system name as a string. - pow + 1mpow0m (class=arithmetic #args=2) Exponentiation. Same as **, but as a function. - qnorm + 1mqnorm0m (class=math #args=1) Normal cumulative distribution function. - reduce + 1mreduce0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element, and return the accumulated element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is the first element for arrays, or the first element's key-value pair for maps. Examples: Array example: reduce([1,2,3,4,5], func(acc,e) {return acc + e**3}) returns 225. Map example: reduce({"a":1, "b":3, "c": 5}, func(acck,accv,ek,ev) {return {"sum_of_squares": accv + ev**2}}) returns {"sum_of_squares": 35}. - regextract + 1mregextract0m (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does. Examples: regextract("index ab09 file", "[a-z][a-z][0-9][0-9]") gives "ab09" regextract("index a999 file", "[a-z][a-z][0-9][0-9]") gives (absent), which will result in an assignment not happening. - regextract_or_else + 1mregextract_or_else0m (class=string #args=3) Like regextract but the third argument is the return value in case the input string (first argument) doesn't match the pattern (second argument). Examples: regextract_or_else("index ab09 file", "[a-z][a-z][0-9][0-9]", "nonesuch") gives "ab09" regextract_or_else("index a999 file", "[a-z][a-z][0-9][0-9]", "nonesuch") gives "nonesuch" - round + 1mround0m (class=math #args=1) Round to nearest integer. - roundm + 1mroundm0m (class=math #args=2) Round to nearest multiple of m: roundm($x,$m) is the same as round($x/$m)*$m. - rstrip + 1mrstrip0m (class=string #args=1) Strip trailing whitespace from string. - sec2dhms + 1msec2dhms0m (class=time #args=1) Formats integer seconds as in sec2dhms(500000) = "5d18h53m20s" - sec2gmt + 1msec2gmt0m (class=time #args=1,2) Formats seconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. Examples: sec2gmt(1234567890) = "2009-02-13T23:31:30Z" sec2gmt(1234567890.123456) = "2009-02-13T23:31:30Z" sec2gmt(1234567890.123456, 6) = "2009-02-13T23:31:30.123456Z" - sec2gmtdate + 1msec2gmtdate0m (class=time #args=1) Formats seconds since epoch (integer part) as GMT timestamp with year-month-date. Leaves non-numbers as-is. Example: sec2gmtdate(1440768801.7) = "2015-08-28". - sec2hms + 1msec2hms0m (class=time #args=1) Formats integer seconds as in sec2hms(5000) = "01:23:20" - sec2localdate + 1msec2localdate0m (class=time #args=1,2) Formats seconds since epoch (integer part) as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. Examples: sec2localdate(1440768801.7) = "2015-08-28" with TZ="Asia/Istanbul" sec2localdate(1440768801.7, "Asia/Istanbul") = "2015-08-28" - sec2localtime + 1msec2localtime0m (class=time #args=1,2,3) Formats seconds since epoch (integer part) as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part Examples: sec2localtime(1234567890) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" @@ -2560,31 +2560,31 @@ FUNCTIONS FOR FILTER/PUT sec2localtime(1234567890.123456, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" sec2localtime(1234567890.123456, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" - select + 1mselect0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, includes each input element in the output if the function returns true. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: select([1,2,3,4,5], func(e) {return e >= 3}) returns [3, 4, 5]. Map example: select({"a":1, "b":3, "c":5}, func(k,v) {return v >= 3}) returns {"b":3, "c": 5}. - sgn + 1msgn0m (class=math #args=1) +1, 0, -1 for positive, zero, negative input respectively. - sha1 + 1msha10m (class=hashing #args=1) SHA1 hash. - sha256 + 1msha2560m (class=hashing #args=1) SHA256 hash. - sha512 + 1msha5120m (class=hashing #args=1) SHA512 hash. - sin + 1msin0m (class=math #args=1) Trigonometric sine. - sinh + 1msinh0m (class=math #args=1) Hyperbolic sine. - sort + 1msort0m (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values. Examples: Default sorting: sort([3,"A",1,"B",22]) returns [1, 3, 20, "A", "B"]. @@ -2600,67 +2600,67 @@ FUNCTIONS FOR FILTER/PUT Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}. Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}. - splita + 1msplita0m (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on. Example: splita("3,4,5", ",") = [3,4,5] - splitax + 1msplitax0m (class=conversion #args=2) Splits string into array without type inference. First argument is string to split; second is the separator to split on. Example: splitax("3,4,5", ",") = ["3","4","5"] - splitkv + 1msplitkv0m (class=conversion #args=3) Splits string by separators into map with type inference. First argument is string to split; second argument is pair separator; third argument is field separator. Example: splitkv("a=3,b=4,c=5", "=", ",") = {"a":3,"b":4,"c":5} - splitkvx + 1msplitkvx0m (class=conversion #args=3) Splits string by separators into map without type inference (keys and values are strings). First argument is string to split; second argument is pair separator; third argument is field separator. Example: splitkvx("a=3,b=4,c=5", "=", ",") = {"a":"3","b":"4","c":"5"} - splitnv + 1msplitnv0m (class=conversion #args=2) Splits string by separator into integer-indexed map with type inference. First argument is string to split; second argument is separator to split on. Example: splitnv("a,b,c", ",") = {"1":"a","2":"b","3":"c"} - splitnvx + 1msplitnvx0m (class=conversion #args=2) Splits string by separator into integer-indexed map without type inference (values are strings). First argument is string to split; second argument is separator to split on. Example: splitnvx("3,4,5", ",") = {"1":"3","2":"4","3":"5"} - sqrt + 1msqrt0m (class=math #args=1) Square root. - ssub + 1mssub0m (class=string #args=3) Like sub but does no regexing. No characters are special. Example: ssub("abc.def", ".", "X") gives "abcXdef" - strftime + 1mstrftime0m (class=time #args=2) Formats seconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: strftime(1440768801.7,"%Y-%m-%dT%H:%M:%SZ") = "2015-08-28T13:33:21Z" strftime(1440768801.7,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.700Z" - strftime_local + 1mstrftime_local0m (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. Examples: strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%S %z") = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul" strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.700 +0300" with TZ="Asia/Istanbul" strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.700 +0300" - string + 1mstring0m (class=conversion #args=1) Convert int/float/bool/string/array/map to string. - strip + 1mstrip0m (class=string #args=1) Strip leading and trailing whitespace from string. - strlen + 1mstrlen0m (class=string #args=1) String length. - strptime + 1mstrptime0m (class=time #args=2) strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local. Examples: strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000 @@ -2668,7 +2668,7 @@ FUNCTIONS FOR FILTER/PUT strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400 strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200 - strptime_local + 1mstrptime_local0m (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. Examples: strptime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001 with TZ="Asia/Istanbul" @@ -2676,7 +2676,7 @@ FUNCTIONS FOR FILTER/PUT strptime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S") = 1440758001 with TZ="Asia/Istanbul" strptime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001 - sub + 1msub0m (class=string #args=3) '$name = sub($name, "old", "new")': replace once (first match, if there are multiple matches), with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to sub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: sub("ababab", "ab", "XY") gives "XYabab" @@ -2685,229 +2685,229 @@ FUNCTIONS FOR FILTER/PUT sub("abcdefg", "[ce]", "X") gives "abXdefg" sub("prefix4529:suffix8567", "suffix([0-9]+)", "name\1") gives "prefix4529:name8567" - substr + 1msubstr0m (class=string #args=3) substr is an alias for substr0. See also substr1. Miller is generally 1-up with all array and string indices, but, this is a backward-compatibility issue with Miller 5 and below. Arrays are new in Miller 6; the substr function is older. - substr0 + 1msubstr00m (class=string #args=3) substr0(s,m,n) gives substring of s from 0-up position m to n inclusive. Negative indices -len .. -1 alias to 0 .. len-1. See also substr and substr1. - substr1 + 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. - system + 1msystem0m (class=system #args=1) Run command string, yielding its stdout minus final carriage return. - systime + 1msystime0m (class=time #args=0) Returns the system time in floating-point seconds since the epoch. - systimeint + 1msystimeint0m (class=time #args=0) Returns the system time in integer seconds since the epoch. - tan + 1mtan0m (class=math #args=1) Trigonometric tangent. - tanh + 1mtanh0m (class=math #args=1) Hyperbolic tangent. - tolower + 1mtolower0m (class=string #args=1) Convert string to lowercase. - toupper + 1mtoupper0m (class=string #args=1) Convert string to uppercase. - truncate + 1mtruncate0m (class=string #args=2) Truncates string first argument to max length of int second argument. - typeof + 1mtypeof0m (class=typing #args=1) Convert argument to type of argument (e.g. "str"). For debug. - unflatten + 1munflatten0m (class=collections #args=2) Reverses flatten. Useful for nested JSON-like structures for non-JSON file formats like CSV. The first argument is a map, and the second argument is the flatten separator. See also arrayify. See "Flatten/unflatten: converting between JSON and tabular formats" at https://miller.readthedocs.io for more information. Example: unflatten({"a.b.c" : 4}, ".") is {"a": "b": { "c": 4 }}. - unformat + 1munformat0m (class=string #args=2) Using first argument as format string, unpacks second argument into an array of matches, with type-inference. On non-match, returns error -- use is_error() to check. Examples: unformat("{}:{}:{}", "1:2:3") gives [1, 2, 3]. unformat("{}h{}m{}s", "3h47m22s") gives [3, 47, 22]. is_error(unformat("{}h{}m{}s", "3:47:22")) gives true. - unformatx + 1munformatx0m (class=string #args=2) Same as unformat, but without type-inference. Examples: unformatx("{}:{}:{}", "1:2:3") gives ["1", "2", "3"]. unformatx("{}h{}m{}s", "3h47m22s") gives ["3", "47", "22"]. is_error(unformatx("{}h{}m{}s", "3:47:22")) gives true. - uptime + 1muptime0m (class=time #args=0) Returns the time in floating-point seconds since the current Miller program was started. - urand + 1murand0m (class=math #args=0) Floating-point numbers uniformly distributed on the unit interval. Example: Int-valued example: '$n=floor(20+urand()*11)'. - urand32 + 1murand320m (class=math #args=0) Integer uniformly distributed 0 and 2**32-1 inclusive. - urandelement + 1murandelement0m (class=math #args=1) Random sample from the first argument, which must be an non-empty array. - urandint + 1murandint0m (class=math #args=2) Integer uniformly distributed between inclusive integer endpoints. - urandrange + 1murandrange0m (class=math #args=2) Floating-point numbers uniformly distributed on the interval [a, b). - utf8_to_latin1 + 1mutf8_to_latin10m (class=string #args=1) Tries to convert UTF-8-encoded string to Latin-1-encoded string. If argument is array or map, recurses into it. Examples: $y = utf8_to_latin1($x) $* = utf8_to_latin1($*) - version + 1mversion0m (class=system #args=0) Returns the Miller version as a string. - ! + 1m!0m (class=boolean #args=1) Logical negation. - != + 1m!=0m (class=boolean #args=2) String/numeric inequality. Mixing number and string results in string compare. - !=~ + 1m!=~0m (class=boolean #args=2) String (left-hand side) does not match regex (right-hand side), e.g. '$name !=~ "^a.*b$"'. - % + 1m%0m (class=arithmetic #args=2) Remainder; never negative-valued (pythonic). - & + 1m&0m (class=arithmetic #args=2) Bitwise AND. - && + 1m&&0m (class=boolean #args=2) Logical AND. - * + 1m*0m (class=arithmetic #args=2) Multiplication, with integer*integer overflow to float. - ** + 1m**0m (class=arithmetic #args=2) Exponentiation. Same as pow, but as an infix operator. - + + 1m+0m (class=arithmetic #args=1,2) Addition as binary operator; unary plus operator. - - + 1m-0m (class=arithmetic #args=1,2) Subtraction as binary operator; unary negation operator. - . + 1m.0m (class=string #args=2) String concatenation. Non-strings are coerced, so you can do '"ax".98' etc. - .* + 1m.*0m (class=arithmetic #args=2) Multiplication, with integer-to-integer overflow. - .+ + 1m.+0m (class=arithmetic #args=2) Addition, with integer-to-integer overflow. - .- + 1m.-0m (class=arithmetic #args=2) Subtraction, with integer-to-integer overflow. - ./ + 1m./0m (class=arithmetic #args=2) Integer division, rounding toward zero. - / + 1m/0m (class=arithmetic #args=2) Division. Integer / integer is integer when exact, else floating-point: e.g. 6/3 is 2 but 6/4 is 1.5. - // + 1m//0m (class=arithmetic #args=2) Pythonic integer division, rounding toward negative. - < + 1m<0m (class=boolean #args=2) String/numeric less-than. Mixing number and string results in string compare. - << + 1m<<0m (class=arithmetic #args=2) Bitwise left-shift. - <= + 1m<=0m (class=boolean #args=2) String/numeric less-than-or-equals. Mixing number and string results in string compare. - <=> + 1m<=>0m (class=boolean #args=2) Comparator, nominally for sorting. Given a <=> b, returns <0, 0, >0 as a < b, a == b, or a > b, respectively. - == + 1m==0m (class=boolean #args=2) String/numeric equality. Mixing number and string results in string compare. - =~ + 1m=~0m (class=boolean #args=2) String (left-hand side) matches regex (right-hand side), e.g. '$name =~ "^a.*b$"'. Capture groups \1 through \9 are matched from (...) in the right-hand side, and can be used within subsequent DSL statements. See also "Regular expressions" at https://miller.readthedocs.io. Examples: With if-statement: if ($url =~ "http.*com") { ... } Without if-statement: given $line = "index ab09 file", and $line =~ "([a-z][a-z])([0-9][0-9])", then $label = "[\1:\2]", $label is "[ab:09]" - > + 1m>0m (class=boolean #args=2) String/numeric greater-than. Mixing number and string results in string compare. - >= + 1m>=0m (class=boolean #args=2) String/numeric greater-than-or-equals. Mixing number and string results in string compare. - >> + 1m>>0m (class=arithmetic #args=2) Bitwise signed right-shift. - >>> + 1m>>>0m (class=arithmetic #args=2) Bitwise unsigned right-shift. - ?: + 1m?:0m (class=boolean #args=3) Standard ternary operator. - ?? + 1m??0m (class=boolean #args=2) Absent-coalesce operator. $a ?? 1 evaluates to 1 if $a isn't defined in the current record. - ??? + 1m???0m (class=boolean #args=2) Absent/empty-coalesce operator. $a ??? 1 evaluates to 1 if $a isn't defined in the current record, or has empty value. - ^ + 1m^0m (class=arithmetic #args=2) Bitwise XOR. - ^^ + 1m^^0m (class=boolean #args=2) Logical XOR. - | + 1m|0m (class=arithmetic #args=2) Bitwise OR. - || + 1m||0m (class=boolean #args=2) Logical OR. - ~ + 1m~0m (class=arithmetic #args=1) Bitwise NOT. Beware '$y=~$x' since =~ is the regex-match operator: try '$y = ~$x'. -KEYWORDS FOR PUT AND FILTER - all +1mKEYWORDS FOR PUT AND FILTER0m + 1mall0m all: used in "emit1", "emit", "emitp", and "unset" as a synonym for @* - begin + 1mbegin0m begin: defines a block of statements to be executed before input records are ingested. The body statements must be wrapped in curly braces. Example: 'begin { @count = 0 }' - bool + 1mbool0m bool: declares a boolean local variable in the current curly-braced scope. Type-checking happens at assignment: 'bool b = 1' is an error. - break + 1mbreak0m break: causes execution to continue after the body of the current for/while/do-while loop. - call + 1mcall0m call: used for invoking a user-defined subroutine. Example: 'subr s(k,v) { print k . " is " . v} call s("a", $a)' - continue + 1mcontinue0m continue: causes execution to skip the remaining statements in the body of the current for/while/do-while loop. For-loop increments are still applied. - do + 1mdo0m do: with "while", introduces a do-while loop. The body statements must be wrapped in curly braces. - dump + 1mdump0m dump: prints all currently defined out-of-stream variables immediately to stdout as JSON. @@ -2926,21 +2926,21 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump >> "mytap.dat"}' Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump | "jq .[]"}' - edump + 1medump0m edump: prints all currently defined out-of-stream variables immediately to stderr as JSON. Example: mlr --from f.dat put -q '@v[NR]=$*; end { edump }' - elif + 1melif0m elif: the way Miller spells "else if". The body statements must be wrapped in curly braces. - else + 1melse0m else: terminates an if/elif/elif chain. The body statements must be wrapped in curly braces. - emit1 + 1memit10m emit1: inserts an out-of-stream variable into the output record stream. Unlike the other map variants, side-by-sides, indexing, and redirection are not supported, but you can emit any map-valued expression. @@ -2950,7 +2950,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emit + 1memit0m emit: inserts an out-of-stream variable into the output record stream. Hashmap indices present in the data but not slotted by emit arguments are not output. @@ -2981,7 +2981,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emitf + 1memitf0m emitf: inserts non-indexed out-of-stream variable(s) side-by-side into the output record stream. @@ -3009,7 +3009,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emitp + 1memitp0m emitp: inserts an out-of-stream variable into the output record stream. Hashmap indices present in the data but not slotted by emitp arguments are output concatenated with ":". @@ -3039,29 +3039,29 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - end + 1mend0m end: defines a block of statements to be executed after input records are ingested. The body statements must be wrapped in curly braces. Example: 'end { emit @count }' Example: 'end { eprint "Final count is " . @count }' - eprint + 1meprint0m eprint: prints expression immediately to stderr. Example: mlr --from f.dat put -q 'eprint "The sum of x and y is ".($x+$y)' Example: mlr --from f.dat put -q 'for (k, v in $*) { eprint k . " => " . v }' Example: mlr --from f.dat put '(NR % 1000 == 0) { eprint "Checkpoint ".NR}' - eprintn + 1meprintn0m eprintn: prints expression immediately to stderr, without trailing newline. Example: mlr --from f.dat put -q 'eprintn "The sum of x and y is ".($x+$y); eprint ""' - false + 1mfalse0m false: the boolean literal value. - filter + 1mfilter0m filter: includes/excludes the record in the output record stream. Example: mlr --from f.dat put 'filter (NR == 2 || $x > 5.4)' @@ -3072,11 +3072,11 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q '@running_sum += $x * $y; emit @running_sum' - float + 1mfloat0m float: declares a floating-point local variable in the current curly-braced scope. Type-checking happens at assignment: 'float x = 0' is an error. - for + 1mfor0m for: defines a for-loop using one of three styles. The body statements must be wrapped in curly braces. For-loop over stream record: @@ -3093,71 +3093,71 @@ KEYWORDS FOR PUT AND FILTER Example: 'for (var i = 0, var b = 1; i < 10; i += 1, b *= 2) { ... }' - func + 1mfunc0m func: used for defining a user-defined function. Example: 'func f(a,b) { return sqrt(a**2+b**2)} $d = f($x, $y)' - funct + 1mfunct0m funct: used for saying that a function argument is a user-defined function. Example: 'func g(num a, num b, funct f) :num { return f(a**2+b**2) }' - if + 1mif0m if: starts an if/elif/elif chain. The body statements must be wrapped in curly braces. - in + 1min0m in: used in for-loops over stream records or out-of-stream variables. - int + 1mint0m int: declares an integer local variable in the current curly-braced scope. Type-checking happens at assignment: 'int x = 0.0' is an error. - map + 1mmap0m map: declares an map-valued local variable in the current curly-braced scope. Type-checking happens at assignment: 'map b = 0' is an error. map b = {} is always OK. map b = a is OK or not depending on whether a is a map. - num + 1mnum0m num: declares an int/float local variable in the current curly-braced scope. Type-checking happens at assignment: 'num b = true' is an error. - print + 1mprint0m print: prints expression immediately to stdout. Example: mlr --from f.dat put -q 'print "The sum of x and y is ".($x+$y)' Example: mlr --from f.dat put -q 'for (k, v in $*) { print k . " => " . v }' Example: mlr --from f.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}' - printn + 1mprintn0m printn: prints expression immediately to stdout, without trailing newline. Example: mlr --from f.dat put -q 'printn "."; end { print "" }' - return + 1mreturn0m return: specifies the return value from a user-defined function. Omitted return statements (including via if-branches) result in an absent-null return value, which in turns results in a skipped assignment to an LHS. - stderr + 1mstderr0m stderr: Used for tee, emit, emitf, emitp, print, and dump in place of filename to print to standard error. - stdout + 1mstdout0m stdout: Used for tee, emit, emitf, emitp, print, and dump in place of filename to print to standard output. - str + 1mstr0m str: declares a string local variable in the current curly-braced scope. Type-checking happens at assignment. - subr + 1msubr0m subr: used for defining a subroutine. Example: 'subr s(k,v) { print k . " is " . v} call s("a", $a)' - tee + 1mtee0m tee: prints the current record to specified file. This is an immediate print to the specified file (except for pprint format which of course waits until the end of the input stream to format all output). @@ -3183,10 +3183,10 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*' Example: mlr --from f.dat put -q --ojson 'tee | "gzip > /tmp/data-".$a.".gz", $*' - true + 1mtrue0m true: the boolean literal value. - unset + 1munset0m unset: clears field(s) from the current record, or an out-of-stream or local variable. Example: mlr --from f.dat put 'unset $x' @@ -3196,75 +3196,75 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put '...; unset @sums["green"]' Example: mlr --from f.dat put '...; unset @*' - var + 1mvar0m var: declares an untyped local variable in the current curly-braced scope. Examples: 'var a=1', 'var xyz=""' - while + 1mwhile0m while: introduces a while loop, or with "do", introduces a do-while loop. The body statements must be wrapped in curly braces. - ENV + 1mENV0m ENV: access to environment variables by name, e.g. '$home = ENV["HOME"]' - FILENAME + 1mFILENAME0m FILENAME: evaluates to the name of the current file being processed. - FILENUM + 1mFILENUM0m FILENUM: evaluates to the number of the current file being processed, starting with 1. - FNR + 1mFNR0m FNR: evaluates to the number of the current record within the current file being processed, starting with 1. Resets at the start of each file. - IFS + 1mIFS0m IFS: evaluates to the input field separator from the command line. - IPS + 1mIPS0m IPS: evaluates to the input pair separator from the command line. - IRS + 1mIRS0m IRS: evaluates to the input record separator from the command line, or to LF or CRLF from the input data if in autodetect mode (which is the default). - M_E + 1mM_E0m M_E: the mathematical constant e. - M_PI + 1mM_PI0m M_PI: the mathematical constant pi. - NF + 1mNF0m NF: evaluates to the number of fields in the current record. - NR + 1mNR0m NR: evaluates to the number of the current record over all files being processed, starting with 1. Does not reset at the start of each file. - OFS + 1mOFS0m OFS: evaluates to the output field separator from the command line. - OPS + 1mOPS0m OPS: evaluates to the output pair separator from the command line. - ORS + 1mORS0m ORS: evaluates to the output record separator from the command line, or to LF or CRLF from the input data if in autodetect mode (which is the default). -AUTHOR +1mAUTHOR0m Miller is written by John Kerl
@@ -95,9 +89,6 @@ purple triangle false 7 6500 80.1405 5.8240 466.738272 yellow circle true 8 7300 63.9785 4.2370 271.0769045 yellow circle true 9 8700 63.5058 8.3350 529.3208430000001 purple square false 10 9100 72.3735 8.2430 596.5747605000001 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamOne of Miller's key features is the ability to express data-transformation right there at the keyboard, interactively. But if you find yourself using expressions repeatedly, you can put everything between the single quotes into a file and refer to that using `put -f`: @@ -125,9 +116,6 @@ purple triangle false 7 6500 80.1405 5.8240 466.738272 yellow circle true 8 7300 63.9785 4.2370 271.0769045 yellow circle true 9 8700 63.5058 8.3350 529.3208430000001 purple square false 10 9100 72.3735 8.2430 596.5747605000001 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream This becomes particularly important on Windows. Quite a bit of effort was put into making Miller on Windows be able to handle the kinds of single-quoted expressions we're showing here, but if you get syntax-error messages on Windows using examples in this documentation, you can put the parts between single quotes into a file and refer to that using `mlr put -f` -- or, use the triple-double-quote trick as described in the [Miller on Windows page](miller-on-windows.md). @@ -158,9 +146,6 @@ purple square false 10 91 72.3735 8.2430 sum 652.7185 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream If you want the end-block output to be the only output, and not include the records from the input data, you can use `mlr put -q`: @@ -171,9 +156,6 @@ If you want the end-block output to be the only output, and not include the reco
sum 652.7185 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -185,9 +167,6 @@ go tool pprof -http=:8080 foo-stream "sum": 652.7185 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -205,9 +184,6 @@ go tool pprof -http=:8080 foo-stream "sum": 652.7185 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamWe'll see in the documentation for [stats1](reference-verbs.md#stats1) that there's a lower-keystroking way to get counts and sums of things: @@ -222,9 +198,6 @@ We'll see in the documentation for [stats1](reference-verbs.md#stats1) that ther "quantity_count": 10 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream So, take this sum/count example as an indication of the kinds of things you can do using Miller's programming language. @@ -276,9 +249,6 @@ a b c nf nr fnr filename filenum newnf 1 2 3 3 1 1 data/a.csv 1 8 4 5 6 3 2 2 data/a.csv 1 8 7 8 9 3 3 1 data/b.csv 2 8 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Functions and local variables @@ -313,9 +283,6 @@ purple triangle false 7 65 80.1405 5.8240 5040 yellow circle true 8 73 63.9785 4.2370 40320 yellow circle true 9 87 63.5058 8.3350 362880 purple square false 10 91 72.3735 8.2430 3628800 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Note that here we used the `-f` flag to `put` to load our function @@ -353,9 +320,6 @@ end {
count_of_red sum_of_red 4 247.84139999999996 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamMiller's else-if is spelled `elif`. @@ -386,9 +350,6 @@ print a,b,c 1,2,3 4,5,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -403,9 +364,6 @@ KEY IS a VALUE IS 4 KEY IS b VALUE IS 5 KEY IS c VALUE IS 6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamHere we used the local variables `k` and `v`. Now we've seen four kinds of variables: @@ -458,9 +416,6 @@ For example, you can sum up all the `$a` values across records without having to "b": 5 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -483,7 +438,4 @@ go tool pprof -http=:8080 foo-stream "sum_of_a": 5 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/misc-examples.md b/docs/src/misc-examples.md index 5720c6853f..0a528d4368 100644 --- a/docs/src/misc-examples.md +++ b/docs/src/misc-examples.md @@ -177,14 +177,9 @@ And, suppose you want to compute the differences in the counters between adjacen First, rename counter columns to make them distinct: -
+mlr --csv rename count,previous_count data/previous_counters.csv > data/prevtemp.csv--Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -cat data/prevtemp.csv @@ -197,14 +192,9 @@ orange,694 purple,12-+## Data processing @@ -229,9 +226,6 @@ For example (see [https://github.com/johnkerl/miller/issues/178](https://github. "a": "0123" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streammlr --csv rename count,current_count data/current_counters.csv > data/currtemp.csv--Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -cat data/currtemp.csv @@ -233,9 +223,6 @@ orange 694 670 -24 yellow 0 27 (error) blue 6838 6944 106 purple 12 0 (error) -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSee also the [record-heterogeneity page](record-heterogeneity.md). diff --git a/docs/src/new-in-miller-6.md b/docs/src/new-in-miller-6.md index 17f6f63bb0..13e17a06ff 100644 --- a/docs/src/new-in-miller-6.md +++ b/docs/src/new-in-miller-6.md @@ -165,9 +165,6 @@ purple,square,false,10,91,72.3735,8.2430 yellow,triangle,true,1,11,43.6498,9.8870 purple,triangle,false,5,51,81.2290,8.5910 purple,triangle,false,7,65,80.1405,5.8240 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -244,9 +238,6 @@ go tool pprof -http=:8080 foo-stream "y": 1.230000000 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream### Deduping of repeated field names @@ -348,9 +339,6 @@ This works in Miller 6 (and worked in Miller 5 as well) and is supported:
input=1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamPlease see the [section on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) diff --git a/docs/src/operating-on-all-fields.md b/docs/src/operating-on-all-fields.md index 0d9df35718..452f4486d9 100644 --- a/docs/src/operating-on-all-fields.md +++ b/docs/src/operating-on-all-fields.md @@ -40,9 +40,6 @@ a_b_c,def,g_h_i 123,4567,890 2468,1357,3579 9987,3312,4543 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -53,9 +50,6 @@ a_b_c def g_h_i 123 4567 890 2468 1357 3579 9987 3312 4543 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamYou can also do this with a for-loop: @@ -79,9 +73,6 @@ a_b_c def g_h_i 123 4567 890 2468 1357 3579 9987 3312 4543 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Bulk rename of fields with carriage returns @@ -115,9 +106,6 @@ field A,field B 1,2 3,3 6,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Search-and-replace over all fields @@ -149,9 +137,6 @@ for (k in $*) { a,b,c thX quick,brown fox,jumpXd ovXr,thX,lazy dogs -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Full field renames and reassigns @@ -192,7 +177,4 @@ z=0.758679,KEYFIELD=eks,i=3,b=pan,y=0.758679,x=0.522151 z=0.204603,KEYFIELD=wye,i=6,b=wye,y=0.204603,x=0.338318 z=0.381399,KEYFIELD=eks,i=10,b=wye,y=0.381399,x=0.134188 z=0.573288,KEYFIELD=wye,i=15,b=pan,y=0.573288,x=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream diff --git a/docs/src/operating-on-all-records.md b/docs/src/operating-on-all-records.md index 483dba3190..6663f1c18a 100644 --- a/docs/src/operating-on-all-records.md +++ b/docs/src/operating-on-all-records.md @@ -86,9 +86,6 @@ after all the input is read. "sum": 119 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream And if all we want is the final output and not the input data, we can use `put @@ -114,9 +111,6 @@ And if all we want is the final output and not the input data, we can use `put "sum": 119 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream As discussed a bit more on the page on [streaming processing and memory @@ -179,9 +173,6 @@ cat,54 "sum": 119 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The downside to this, of course, is that this retains all records (plus data-structure overhead) in memory, so you're limited to processing files that fit in your computer's memory. The upside, though, is that you can do random access over the records using things like @@ -241,9 +232,6 @@ The third option is to retain records in an [array](reference-main-arrays.md), t "sum": 119 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Just as with the retain-as-map approach, the downside is the overhead of @@ -288,9 +276,6 @@ array will have [null-gaps](reference-main-arrays.md) in it: ] [ ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream You can index `@records` by `@count` rather than `NR` to get a contiguous array: @@ -334,9 +319,6 @@ You can index `@records` by `@count` rather than `NR` to get a contiguous array: "sum": 91 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream If you use a map to retain records, then this is a non-issue: maps can retain whatever values you like: @@ -378,9 +360,6 @@ If you use a map to retain records, then this is a non-issue: maps can retain wh "sum": 91 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Do note that Miller [maps](reference-main-maps.md) preserve insertion order, so @@ -425,9 +404,6 @@ interested in: "sum": 91 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Sorting diff --git a/docs/src/parsing-and-formatting-fields.md b/docs/src/parsing-and-formatting-fields.md index d9f13f1b24..1f2d5426e1 100644 --- a/docs/src/parsing-and-formatting-fields.md +++ b/docs/src/parsing-and-formatting-fields.md @@ -53,9 +53,6 @@ Robert,"Bob,Bobby,Biker","2,4,6" "codes": "2,4,6" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Then we can use the [`splita`](reference-dsl-builtin-functions.md#splita) function to split the @@ -77,9 +74,6 @@ Then we can use the [`splita`](reference-dsl-builtin-functions.md#splita) functi "codes": "2,4,6" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Likewise we can split the `codes` field. Since these look like numbers, we can again use `splita` @@ -103,9 +97,6 @@ substrings, with no type inference: "codes": [2, 4, 6] } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -124,9 +115,6 @@ go tool pprof -http=:8080 foo-stream "codes": ["2", "4", "6"] } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamWe can do operations on the array, then use [joinv](reference-dsl-builtin-functions.md#joinv) to put them @@ -152,9 +140,6 @@ back together: "codes": "200,400,600" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -168,9 +153,6 @@ go tool pprof -http=:8080 foo-stream name,nicknames,codes Alice,"Allie,Skater","100,300,500" Robert,"Bob,Bobby,Biker","200,400,600" -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThe full list of split functions includes @@ -213,9 +195,6 @@ host,status xy01.east,up ab02.west,down ac91.west,up -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Flatten/unflatten: representing arrays in CSV @@ -240,9 +219,6 @@ _flatten/unflatten strategy_: array-valued fields are turned into multiple CSV c "codes": ["2", "4", "6"] } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -252,9 +228,6 @@ go tool pprof -http=:8080 foo-stream name,nicknames,codes.1,codes.2,codes.3 Alice,"Allie,Skater",1,3,5 Robert,"Bob,Bobby,Biker",2,4,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSee the [flatten/unflatten: converting between JSON and tabular formats¶](flatten-unflatten.md) @@ -306,9 +279,6 @@ stamp,event "pieces": [5, 19, "07", 56] } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -323,9 +293,6 @@ stamp event description 5-18:53:22 close 5 day(s) 18 hour(s) 53 minute(s) 22 seconds(s) 5-19:07:34 open 5 day(s) 19 hour(s) 07 minute(s) 34 seconds(s) 5-19:07:56 close 5 day(s) 19 hour(s) 07 minute(s) 56 seconds(s) -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Using regular expressions and capture groups @@ -345,9 +312,6 @@ stamp event description 5-18:53:22 close 5 day(s) 18 hour(s) 53 minute(s) 22 seconds(s) 5-19:07:34 open 5 day(s) 19 hour(s) 07 minute(s) 34 seconds(s) 5-19:07:56 close 5 day(s) 19 hour(s) 07 minute(s) 56 seconds(s) -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Special case: timestamps @@ -373,9 +337,6 @@ sec dhms 100 1m40s 10000 2h46m40s 1000000 11d13h46m40s -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Please see @@ -392,9 +353,6 @@ One way to handle currencies is to sub out the currency marker (like `$`) as wel
d=1234.56 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Nesting and unnesting fields @@ -410,9 +368,6 @@ For example: name nicknames codes Alice Allie,Skater 1,3,5 Robert Bob,Bobby,Biker 2,4,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -425,9 +380,6 @@ Alice Skater 1,3,5 Robert Bob 2,4,6 Robert Bobby 2,4,6 Robert Biker 2,4,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSee [documentation on the nest verb](reference-verbs.md#nest) for general information on how to do this. diff --git a/docs/src/programming-examples.md b/docs/src/programming-examples.md index e2f92944af..a8e42db583 100644 --- a/docs/src/programming-examples.md +++ b/docs/src/programming-examples.md @@ -89,9 +89,6 @@ end { 83 89 97 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Mandelbrot-set generator @@ -231,9 +228,6 @@ CHARS = @X*o-. @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream But using a very small font size (as small as my Mac will let me go), and by choosing the coordinates to zoom in on a particular part of the complex plane, we can get a nice little picture: diff --git a/docs/src/questions-about-joins.md b/docs/src/questions-about-joins.md index 32d1aec1de..b8bde2d46d 100644 --- a/docs/src/questions-about-joins.md +++ b/docs/src/questions-about-joins.md @@ -30,9 +30,6 @@ hostname ipaddr nadir.east.our.org 10.3.1.18 zenith.west.our.org 10.3.1.27 apoapsis.east.our.org 10.4.5.94 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -49,9 +46,6 @@ ipaddr timestamp bytes 10.3.1.27 1448762599 0 10.3.1.18 1448762598 73425 10.4.5.94 1448762599 12200 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -63,9 +57,6 @@ ipaddr hostname timestamp bytes 10.4.5.94 apoapsis.east.our.org 1448762579 17445 10.4.5.94 apoapsis.east.our.org 1448762589 8899 10.4.5.94 apoapsis.east.our.org 1448762599 12200 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThe issue is that Miller's `join`, by default (before 5.1.0), took input sorted (lexically ascending) by the sort keys on both the left and right files. This design decision was made intentionally to parallel the Unix/Linux system `join` command, which has the same semantics. The benefit of this default is that the joiner program can stream through the left and right files, needing to load neither entirely into memory. The drawback, of course, is that is requires sorted input. @@ -86,9 +77,6 @@ ipaddr hostname timestamp bytes 10.3.1.27 zenith.west.our.org 1448762599 0 10.3.1.18 nadir.east.our.org 1448762598 73425 10.4.5.94 apoapsis.east.our.org 1448762599 12200 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream General advice is to make sure the left-file is relatively small, e.g. containing name-to-number mappings, while saving large amounts of data for the right file. @@ -119,9 +107,6 @@ Joining on color the results are as expected: id,code,color 4,ff0000,red 2,00ff00,green -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream However, if we ask for left-unpaireds, since there's no `color` column, we get a row not having the same column names as the other: @@ -136,9 +121,6 @@ id,code,color id,code 3,0000ff -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream To fix this, we can use **unsparsify**: @@ -153,9 +135,6 @@ id,code,color 4,ff0000,red 2,00ff00,green 3,0000ff, -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Thanks to @aborruso for the tip! @@ -220,7 +199,4 @@ id status name task 20 idle Carol mix 10 idle Bob knead 30 occupied Alice clean -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream diff --git a/docs/src/questions-about-then-chaining.md b/docs/src/questions-about-then-chaining.md index b1c9370517..7f558ee8a4 100644 --- a/docs/src/questions-about-then-chaining.md +++ b/docs/src/questions-about-then-chaining.md @@ -45,9 +45,6 @@ paid cash 2 pending debit 1 pending credit 1 paid debit 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream After that, run it with the next `then` step included: @@ -62,9 +59,6 @@ paid cash 2 pending debit 1 pending credit 1 paid debit 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Now if you use `then` to include another verb after that, the columns `Status`, `Payment_Type`, and `count` will be the input to that verb. @@ -81,12 +75,6 @@ paid cash 2 pending debit 1 pending credit 1 paid debit 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## NR is not consecutive after then-chaining @@ -112,9 +100,6 @@ why don't I see `NR=1` and `NR=2` here??
a=eks,b=pan,i=2,x=0.758679,y=0.522151,NR=2 a=wye,b=pan,i=5,x=0.573288,y=0.863624,NR=5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThe reason is that `NR` is computed for the original input records and isn't dynamically updated. By contrast, `NF` is dynamically updated: it's the number of fields in the current record, and if you add/remove a field, the value of `NF` will change: @@ -124,9 +109,6 @@ The reason is that `NR` is computed for the original input records and isn't dyn
nf1=3,u=4,nf2=5,nf3=3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream`NR`, by contrast (and `FNR` as well), retains the value from the original input stream, and records may be dropped by a `filter` within a `then`-chain. To recover consecutive record numbers, you can use out-of-stream variables as follows: @@ -148,9 +130,6 @@ go tool pprof -http=:8080 foo-stream a b i x y nr1 nr2 eks pan 2 0.758679 0.522151 2 1 wye pan 5 0.573288 0.863624 5 2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Or, simply use `mlr cat -n`: @@ -161,7 +140,4 @@ Or, simply use `mlr cat -n`:
n=1,a=eks,b=pan,i=2,x=0.758679,y=0.522151 n=2,a=wye,b=pan,i=5,x=0.573288,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/randomizing-examples.md b/docs/src/randomizing-examples.md index e30e5a1895..3a389e9a75 100644 --- a/docs/src/randomizing-examples.md +++ b/docs/src/randomizing-examples.md @@ -117,9 +117,6 @@ bin_lo bin_hi u_count s_count 1.88 1.92 [64]#...................[9554] [326]#...................[3703] 1.92 1.96 [64]#...................[9554] [326]#...................[3703] 1.96 2 [64]#...................[9554] [326]#...................[3703] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Randomly selecting words from a list diff --git a/docs/src/record-heterogeneity.md b/docs/src/record-heterogeneity.md index ec35404802..ba80fc3c09 100644 --- a/docs/src/record-heterogeneity.md +++ b/docs/src/record-heterogeneity.md @@ -41,9 +41,6 @@ a,b,c 1,2,3 4,5,6 7,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream It has three records (written here using JSON Lines formatting): @@ -55,9 +52,6 @@ It has three records (written here using JSON Lines formatting): {"a": 1, "b": 2, "c": 3} {"a": 4, "b": 5, "c": 6} {"a": 7, "b": 8, "c": 9} -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Here every row has the same keys, in the same order: `a,b,c`. @@ -72,9 +66,6 @@ a b c 1 2 3 4 5 6 7 8 9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ### Fillable data @@ -89,9 +80,6 @@ a,b,c 1,2,3 4,,6 ,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -101,9 +89,6 @@ go tool pprof -http=:8080 foo-stream {"a": 1, "b": 2, "c": 3} {"a": 4, "b": "", "c": 6} {"a": "", "b": 8, "c": 9} -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThis example is still homogeneous, though: every row has the same keys, in the same order: `a,b,c`. @@ -120,9 +105,6 @@ a b c 1 2 3 4 filler 6 filler 8 9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ### Ragged data @@ -180,9 +162,6 @@ with 1) for too-long rows: "4": 10 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ### Irregular data @@ -220,9 +199,6 @@ the keys: {"a": 1, "b": 2, "c": 3} {"a": 4, "b": 5, "c": 6} {"a": 7, "b": 8, "c": 9} -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The `regularize` verb tries to re-order subsequent rows to look like the first @@ -256,9 +232,6 @@ data for items which are present, but won't log data for items which aren't. "reimaged": true } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream This data is called **sparse** (from the [data-storage term](https://en.wikipedia.org/wiki/Sparse_matrix)). @@ -293,9 +266,6 @@ every record has the same keys: "reimaged": true } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Since this data is now homogeneous (rectangular), it pretty-prints nicely: @@ -308,9 +278,6 @@ host status volume purpose reimaged xy01.east running /dev/sda1 - - xy92.west running - - - xy55.east - /dev/sda1 failover true -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Reading and writing heterogeneous data @@ -350,9 +317,6 @@ For these formats, record-heterogeneity comes naturally: xy01.east running /dev/sda1 xy92.west running failover xy55.east /dev/sda1 true -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -370,9 +334,6 @@ purpose failover host xy55.east volume /dev/sda1 reimaged true -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -382,9 +343,6 @@ go tool pprof -http=:8080 foo-stream host=xy01.east,status=running,volume=/dev/sda1 host=xy92.west,status=running purpose=failover,host=xy55.east,volume=/dev/sda1,reimaged=true -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamEven then, we may wish to put like with like, using the [`group-like`](reference-verbs.md#group-like) verb: @@ -398,9 +356,6 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -412,9 +367,6 @@ resource=/path/to/second/file,loadsec=0.32,ok=true resource=/some/other/path,loadsec=0.97,ok=false record_count=100,resource=/path/to/file record_count=150,resource=/path/to/second/file -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream### Rectangular file formats: CSV and pretty-print @@ -477,9 +429,6 @@ record_count resource resource loadsec ok /some/other/path 0.97 false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -494,9 +443,6 @@ resource loadsec ok record_count resource 100 /path/to/file 150 /path/to/second/file -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamMiller handles explicit header changes as just shown. If your CSV input contains ragged data -- if there are implicit header changes (no intervening blank line and new header line) as seen above -- you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`). @@ -511,9 +457,6 @@ a,b,c a,b,c,4 7,8,9,10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Processing heterogeneous data @@ -550,7 +493,4 @@ count=300,color=blue count=450 count=500,color=green count=600 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 6d803f1b89..bbb65a5ebb 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -37,9 +37,6 @@ PURPLE tr**ngl* false 7 65 80.1405 5.8240 YELLOW c*rcl* true 8 73 63.9785 4.2370 YELLOW c*rcl* true 9 87 63.5058 8.3350 PURPLE sq**r* false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream the `toupper` and `gsub` bits are _functions_. diff --git a/docs/src/reference-dsl-control-structures.md b/docs/src/reference-dsl-control-structures.md index 032f2336ba..16de01613b 100644 --- a/docs/src/reference-dsl-control-structures.md +++ b/docs/src/reference-dsl-control-structures.md @@ -29,9 +29,6 @@ x=0 x=1 x=2 x=3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -43,9 +40,6 @@ x=0 x=1,y=0,z=0 x=2,y=0.3010299956639812,z=0.5486620049392715 x=3,y=0.4771212547196624,z=0.6907396432228734 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -55,9 +49,6 @@ go tool pprof -http=:8080 foo-stream a=abc_123 a=some other name a=xyz_789 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -71,9 +62,6 @@ go tool pprof -http=:8080 foo-stream a=abc_123,b=left_abc,c=right_123 a=some other name a=xyz_789,b=left_xyz,c=right_789 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThis produces heteregenous output which Miller, of course, has no problems with (see [Record Heterogeneity](record-heterogeneity.md)). But if you want homogeneous output, the curly braces can be replaced with a semicolon between the expression and the body statements. This causes `put` to evaluate the boolean expression (along with any side effects, namely, regex-captures `\1`, `\2`, etc.) but doesn't use it as a criterion for whether subsequent assignments should be executed. Instead, subsequent assignments are done unconditionally: @@ -90,9 +78,6 @@ a b c abc_123 left_abc right_123 some other name left_ right_ xyz_789 left_xyz right_789 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Note that pattern-action blocks are just a syntactic variation of if-statements. The following do the same thing: @@ -151,9 +136,6 @@ Miller's `while` and `do-while` are unsurprising in comparison to various langua
x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -169,9 +151,6 @@ go tool pprof -http=:8080 foo-stream
x=1,y=2,3=,4=,5=,foo=bar -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamA `break` or `continue` within nested conditional blocks or if-statements will, @@ -240,9 +219,6 @@ NR = 5 key: i value: 5 key: x value: 0.573288 key: y value: 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -258,9 +234,6 @@ go tool pprof -http=:8080 foo-stream### Key-value for-loops @@ -324,9 +294,6 @@ label1 label2 f1 f2 f3 sum1 sum2 sum3 blue green 100 240 350 690 690 690 red green 120 11 195 326 326 326 yellow blue 140 0 240 380 380 380 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamkey: a valuetype: int key: b valuetype: map -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNote that the value corresponding to a given key may be gotten as through a **computed field name** using square brackets as in `$[e]` for stream records, or by indexing the looped-over variable using square brackets. @@ -283,9 +256,6 @@ value: 20 valuetype: string value: {} valuetype: map value: four valuetype: string value: true valuetype: bool -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -339,9 +306,6 @@ eks pan 2 0.758679 0.522151 string string int float float wye wye 3 0.204603 0.338318 string string int float float eks wye 4 0.381399 0.134188 string string int float float wye pan 5 0.573288 0.863624 string string int float float -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNote that the value of the current field in the for-loop can be gotten either using the bound variable `value`, or through a **computed field name** using square brackets as in `$[key]`. @@ -367,9 +331,6 @@ eks pan 2 0.758679 0.522151 3.28083 13.12332 wye wye 3 0.204603 0.338318 3.542921 14.171684 eks wye 4 0.381399 0.134188 4.515587 18.062348 wye pan 5 0.573288 0.863624 6.4369119999999995 25.747647999999998 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream It can be confusing to modify the stream record while iterating over a copy of it, so instead you might find it simpler to use a local variable in the loop and only update the stream record after the loop: @@ -392,9 +353,6 @@ eks pan 2 0.758679 0.522151 3.28083 wye wye 3 0.204603 0.338318 3.542921 eks wye 4 0.381399 0.134188 4.515587 wye pan 5 0.573288 0.863624 6.4369119999999995 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream You can also start iterating on sub-maps of an out-of-stream or local variable; you can loop over nested keys; you can loop over all out-of-stream variables. The bound variables are bound to a copy of the sub-map as it was before the loop started. The sub-map is specified by square-bracketed indices after `in`, and additional deeper indices are bound to loop key-variables. The terminal values are bound to the loop value-variable whenever the keys are not too shallow. The value-variable may refer to a terminal (string, number) or it may be map-valued if the map goes deeper. Example indexing is as follows: @@ -438,9 +396,6 @@ That's confusing in the abstract, so a concrete example is in order. Suppose the } } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Then we can get at various values as follows: @@ -467,9 +422,6 @@ Then we can get at various values as follows: key=1,valuetype=int key=3,valuetype=map key=6,valuetype=map -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -494,9 +446,6 @@ go tool pprof -http=:8080 foo-streamkey1=3,key2=4,valuetype=int key1=6,key2=7,valuetype=map -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -520,9 +469,6 @@ go tool pprof -http=:8080 foo-streamkey1=7,key2=8,valuetype=int -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream### C-style triple-for loops @@ -545,9 +491,6 @@ eks pan 2 0.758679 0.522151 3 wye wye 3 0.204603 0.338318 6 eks wye 4 0.381399 0.134188 10 wye pan 5 0.573288 0.863624 15 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -569,9 +512,6 @@ eks pan 2 0.758679 0.522151 3 3 wye wye 3 0.204603 0.338318 6 7 eks wye 4 0.381399 0.134188 10 15 wye pan 5 0.573288 0.863624 15 31 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNotes: @@ -604,9 +544,6 @@ a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 x_sum=2.26476 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Since uninitialized out-of-stream variables default to 0 for addition/subtraction and 1 for multiplication when they appear on expression right-hand sides (not quite as in `awk`, where they'd default to 0 either way), the above can be written more succinctly as @@ -624,9 +561,6 @@ a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 x_sum=2.26476 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The **put -q** option suppresses printing of each output record, with only `emit` statements being output. So to get only summary outputs, you could write @@ -639,9 +573,6 @@ The **put -q** option suppresses printing of each output record, with only `emit
x_sum=2.26476 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamWe can do similarly with multiple out-of-stream variables: @@ -659,9 +590,6 @@ We can do similarly with multiple out-of-stream variables:
x_count=5 x_sum=2.26476 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThis is of course (see also [here](reference-dsl.md#verbs-compared-to-dsl)) not much different than @@ -671,9 +599,6 @@ This is of course (see also [here](reference-dsl.md#verbs-compared-to-dsl)) not
x_count=5,x_sum=2.26476 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNote that it's a syntax error for begin/end blocks to refer to field names (beginning with `$`), since begin/end blocks execute outside the context of input records. diff --git a/docs/src/reference-dsl-differences.md b/docs/src/reference-dsl-differences.md index 430d2dec20..5a9ef30157 100644 --- a/docs/src/reference-dsl-differences.md +++ b/docs/src/reference-dsl-differences.md @@ -44,7 +44,7 @@ semicolon where one is needed . The parser tries to remind you about semicolons whenever there's a chance a missing semicolon might be involved in a parse error. -
+mlr --csv --from example.csv put -q ' begin { @count = 0 # No semicolon required -- before closing curly brace @@ -52,11 +52,6 @@ error. $x=1 # No semicolon required -- at end of expression '--Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -mlr --csv --from example.csv put -q ' @@ -176,9 +171,6 @@ avoid this, use the dot operator for string-concatenation instead.[ a b c ] [abc] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSimilarly, a final newline is printed for you; use [`printn`](reference-dsl-output-statements.md#print-statements) to avoid this. @@ -230,9 +222,6 @@ word,value apple,37 ball,28 cat,54 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -249,9 +238,6 @@ go tool pprof -http=:8080 foo-stream Record 1 has word apple Record 2 has word ball Record 3 has word cat -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamAlso, slices for arrays and strings are _doubly inclusive_: `x[3:5]` gets you diff --git a/docs/src/reference-dsl-filter-statements.md b/docs/src/reference-dsl-filter-statements.md index d8d289e90f..0a2de3dd32 100644 --- a/docs/src/reference-dsl-filter-statements.md +++ b/docs/src/reference-dsl-filter-statements.md @@ -25,9 +25,6 @@ You can use the `filter` DSL keyword within the `put` verb. In fact, the followi color,shape,flag,k,index,quantity,rate red,square,true,2,15,79.2778,0.0130 red,circle,true,3,16,13.8103,2.9010 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -37,9 +34,6 @@ go tool pprof -http=:8080 foo-stream color,shape,flag,k,index,quantity,rate red,square,true,2,15,79.2778,0.0130 red,circle,true,3,16,13.8103,2.9010 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThe former, of course, is a little easier to type. For another example: @@ -52,9 +46,6 @@ color,shape,flag,k,index,quantity,rate yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -65,7 +56,4 @@ color,shape,flag,k,index,quantity,rate yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/reference-dsl-higher-order-functions.md b/docs/src/reference-dsl-higher-order-functions.md index 6eafa832aa..7861b646be 100644 --- a/docs/src/reference-dsl-higher-order-functions.md +++ b/docs/src/reference-dsl-higher-order-functions.md @@ -78,9 +78,6 @@ Evens: Odds: [9, 3, 1, 5, 7] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Map examples: @@ -122,9 +119,6 @@ Values with last digit >= 5: "apple": 199, "bottle": 107 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## apply @@ -175,9 +169,6 @@ Cubes: Sorted cubes: [1, 8, 27, 64, 125, 216, 343, 512, 729, 1000] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -237,9 +228,6 @@ Same, with upcased keys: "DALE": 2197, "EMBER": 6967871 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## reduce @@ -304,9 +292,6 @@ Product of values: Concatenation of values: 2,9,10,3,1,4,5,8,7,6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -381,9 +366,6 @@ String-join of values: { "joined": "823,13,199,191,107" } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## fold @@ -427,9 +409,6 @@ Sum with fold and 0 initial value: Sum with fold and 1000000 initial value: 1000055 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -486,9 +465,6 @@ Sum of values with fold and 1000000 initial value: { "sum": 1001333 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## sort @@ -543,9 +519,6 @@ Ascending: Descending: [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Map examples: @@ -637,9 +610,6 @@ Descending by value: "bottle": 107, "dale": 13 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Please see the [sorting page](sorting.md) for more examples. @@ -663,9 +633,6 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -678,9 +645,6 @@ red circle true 3 16 13.8103 2.9010 red square false 4 48 77.5542 7.4670 red square false 6 64 77.1991 9.5310 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -691,9 +655,6 @@ color shape flag k index quantity rate red square true 2 15 79.2778 0.0130 red square false 4 48 77.5542 7.4670 red square false 6 64 77.1991 9.5310 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -711,9 +672,6 @@ purple triangle false 7 65 80.1405 5.8240 false yellow circle true 8 73 63.9785 4.2370 false yellow circle true 9 87 63.5058 8.3350 false purple square false 10 91 72.3735 8.2430 false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -724,9 +682,6 @@ color shape flag k index quantity rate red circle true 3 16 13.8103 2.9010 purple triangle false 5 51 81.2290 8.5910 red square false 6 64 77.1991 9.5310 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThis last example could also be done using a map: @@ -744,9 +699,6 @@ color shape flag k index quantity rate red circle true 3 16 13.8103 2.9010 purple triangle false 5 51 81.2290 8.5910 red square false 6 64 77.1991 9.5310 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Combined examples @@ -770,9 +722,6 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -821,9 +770,6 @@ Sorted, then cubed: Sorted, then cubed, then summed: 2589905 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Caveats @@ -846,9 +792,6 @@ instead of
[3, 4, 5] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream### No IIFEs @@ -888,9 +831,6 @@ but this does:
2187 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream### Built-in functions currently unsupported as arguments @@ -931,7 +871,4 @@ but this does:
[1, 0.9238795325112867, 0.7071067811865476, 0.38268343236508984] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/reference-dsl-operators.md b/docs/src/reference-dsl-operators.md index ea17d52385..ba14512e76 100644 --- a/docs/src/reference-dsl-operators.md +++ b/docs/src/reference-dsl-operators.md @@ -111,9 +111,6 @@ bar.baz bar.baz [ ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream This also works on the left-hand sides of assignment statements: @@ -147,9 +144,6 @@ This also works on the left-hand sides of assignment statements: } } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream A few caveats: @@ -165,9 +159,6 @@ A few caveats: 6989 [ ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream * However (awkwardly), if you want to use `.` for map-traversal as well as string-concatenation in the same statement, you'll need to insert parentheses, as the default associativity is left-to-right: @@ -181,9 +172,6 @@ go tool pprof -http=:8080 foo-stream (error) [ ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -195,7 +183,4 @@ go tool pprof -http=:8080 foo-stream GET -- api/check [ ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/reference-dsl-output-statements.md b/docs/src/reference-dsl-output-statements.md index 845bc3949c..0984b1fd5e 100644 --- a/docs/src/reference-dsl-output-statements.md +++ b/docs/src/reference-dsl-output-statements.md @@ -102,19 +102,11 @@ purple,triangle,false,7,65,80.1405,5.8240 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -
+And if you want indexing, redirects, etc., just assign to a temporary variable and use one of the other emit variants: @@ -426,9 +406,6 @@ id color shape flag k index quantity rate 8 yellow circle true 8 73 63.9785 4.2370 9 yellow circle true 9 87 63.5058 8.3350 10 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Emitf statements @@ -445,9 +422,6 @@ Use **emitf** to output several out-of-stream variables side-by-side in the samemlr --csv --from example.csv put -q 'tee > $shape.".csv", $*'--Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -mlr --csv cat circle.csv @@ -124,9 +116,6 @@ color,shape,flag,k,index,quantity,rate red,circle,true,3,16,13.8103,2.9010 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -138,9 +127,6 @@ red,square,true,2,15,79.2778,0.0130 red,square,false,4,48,77.5542,7.4670 red,square,false,6,64,77.1991,9.5310 purple,square,false,10,91,72.3735,8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -151,9 +137,6 @@ color,shape,flag,k,index,quantity,rate yellow,triangle,true,1,11,43.6498,9.8870 purple,triangle,false,5,51,81.2290,8.5910 purple,triangle,false,7,65,80.1405,5.8240 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSee also [Redirected-output statements](reference-dsl-output-statements.md#redirected-output-statements) for examples. @@ -401,9 +384,6 @@ id color shape flag k index quantity rate 8 yellow circle true 8 73 63.9785 4.2370 9 yellow circle true 9 87 63.5058 8.3350 10 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
count=5,x_sum=2.26476,y_sum=2.585083 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Emit statements @@ -472,9 +446,6 @@ a=wye,b=pan,i=5,x=0.573288,y=0.863624 { "sum": 2.26476 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -482,9 +453,6 @@ go tool pprof -http=:8080 foo-stream
sum=2.26476 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIf it's indexed then use as many names after `emit` as there are indices: @@ -500,9 +468,6 @@ If it's indexed then use as many names after `emit` as there are indices: "wye": 0.777891 } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -512,9 +477,6 @@ go tool pprof -http=:8080 foo-stream a=pan,sum=0.346791 a=eks,sum=1.140078 a=wye,sum=0.777891 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -536,9 +498,6 @@ go tool pprof -http=:8080 foo-stream } } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -550,9 +509,6 @@ a=eks,b=pan,sum=0.758679 a=eks,b=wye,sum=0.381399 a=wye,b=wye,sum=0.204603 a=wye,b=pan,sum=0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -584,9 +540,6 @@ go tool pprof -http=:8080 foo-stream } } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -601,9 +554,6 @@ a=eks,b=pan,i=2,sum=0.758679 a=eks,b=wye,i=4,sum=0.381399 a=wye,b=wye,i=3,sum=0.204603 a=wye,b=pan,i=5,sum=0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNow for **emitp**: if you have as many names following `emit` as there are levels in the out-of-stream variable's map, then `emit` and `emitp` do the same thing. Where they differ is when you don't specify as many names as there are map levels. In this case, Miller needs to flatten multiple map indices down to output-record keys: `emitp` includes full prefixing (hence the `p` in `emitp`) while `emit` takes the deepest map key as the output-record key: @@ -627,9 +577,6 @@ Now for **emitp**: if you have as many names following `emit` as there are level } } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -639,9 +586,6 @@ go tool pprof -http=:8080 foo-stream a=pan,pan=0.346791 a=eks,pan=0.758679,wye=0.381399 a=wye,wye=0.204603,pan=0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -651,9 +595,6 @@ go tool pprof -http=:8080 foo-stream pan=0.346791 pan=0.758679,wye=0.381399 wye=0.204603,pan=0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -663,9 +604,6 @@ go tool pprof -http=:8080 foo-stream a=pan,sum.pan=0.346791 a=eks,sum.pan=0.758679,sum.wye=0.381399 a=wye,sum.wye=0.204603,sum.pan=0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -673,9 +611,6 @@ go tool pprof -http=:8080 foo-stream
sum.pan.pan=0.346791,sum.eks.pan=0.758679,sum.eks.wye=0.381399,sum.wye.wye=0.204603,sum.wye.pan=0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -687,9 +622,6 @@ sum.eks.pan 0.758679 sum.eks.wye 0.381399 sum.wye.wye 0.204603 sum.wye.pan 0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamUse **--flatsep** to specify the character which joins multilevel @@ -702,9 +634,6 @@ keys for `emitp` (it defaults to a colon): a=pan,sum/pan=0.346791 a=eks,sum/pan=0.758679,sum/wye=0.381399 a=wye,sum/wye=0.204603,sum/pan=0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -712,9 +641,6 @@ go tool pprof -http=:8080 foo-stream
sum/pan/pan=0.346791,sum/eks/pan=0.758679,sum/eks/wye=0.381399,sum/wye/wye=0.204603,sum/wye/pan=0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -729,9 +655,6 @@ sum/eks/pan 0.758679 sum/eks/wye 0.381399 sum/wye/wye 0.204603 sum/wye/pan 0.573288 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Multi-emit statements @@ -778,9 +701,6 @@ hat zee 196.3494502965293 385 0.5099985721987774 hat eks 189.0067933716193 389 0.48587864619953547 hat hat 182.8535323148762 381 0.47993053101017374 hat pan 168.5538067327806 363 0.4643355557376876 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream What this does is walk through the first out-of-stream variable (`@x_sum` in this example) as usual, then for each keylist found (e.g. `pan,wye`), include the values for the remaining out-of-stream variables (here, `@x_count` and `@x_mean`). You should use this when all out-of-stream variables in the emit statement have **the same shape and the same keylists**. @@ -803,9 +723,6 @@ eks pan 0.758679 1 eks wye 0.381399 1 wye wye 0.204603 1 wye pan 0.573288 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -829,9 +746,6 @@ eks pan 1 eks wye 1 wye wye 1 wye pan 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -848,7 +762,4 @@ eks pan 0.758679 1 eks wye 0.381399 1 wye wye 0.204603 1 wye pan 0.573288 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/reference-dsl-syntax.md b/docs/src/reference-dsl-syntax.md index 970906b0ce..33ed3f4d7c 100644 --- a/docs/src/reference-dsl-syntax.md +++ b/docs/src/reference-dsl-syntax.md @@ -35,9 +35,6 @@ i j k 7 8 15 8 9 17 9 10 19 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Newlines within the expression are ignored, which can help increase legibility of complex expressions: @@ -63,9 +60,6 @@ wye eks 10000 0.734806020620654365 0.884788571337605134 5 7 2 2 data/s pan wye 10001 0.870530722602517626 0.009854780514656930 5 8 3 2 data/small2 hat wye 10002 0.321507044286237609 0.568893318795083758 5 9 4 2 data/small2 pan zee 10003 0.272054845593895200 0.425789896597056627 5 10 5 2 data/small2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -76,9 +70,6 @@ go tool pprof -http=:8080 foo-streamx_y_corr -0.7479940285189345 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Expressions from files @@ -94,9 +85,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,xy=0.9209970096813562 a=wye,b=wye,i=3,x=0.204603,y=0.338318,xy=0.3953750836016352 a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -108,9 +96,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,xy=0.9209970096813562 a=wye,b=wye,i=3,x=0.204603,y=0.338318,xy=0.3953750836016352 a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamYou may, though, find it convenient to put expressions into files for reuse, and read them @@ -135,9 +120,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,xy=0.9209970096813562 a=wye,b=wye,i=3,x=0.204603,y=0.338318,xy=0.3953750836016352 a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream If you have some of the logic in a file and you want to write the rest on the command line, you can **use the -f and -e options together**: @@ -160,9 +142,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,xy=0.9209970096813562 a=wye,b=wye,i=3,x=0.204603,y=0.338318,xy=0.3953750836016352 a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream A suggested use-case here is defining functions in files, and calling them from command-line expressions. @@ -189,9 +168,6 @@ Semicolons are optional after closing curly braces (which close conditionals and
x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -199,9 +175,6 @@ go tool pprof -http=:8080 foo-stream
x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSemicolons are required between statements even if those statements are on separate lines. **Newlines** are for your convenience but have no syntactic meaning: line endings do not terminate statements. For example, adjacent assignment statements must be separated by semicolons even if those statements are on separate lines: @@ -243,9 +216,6 @@ mlr put ' s,t,u,v 3,-1,5,1 9,-1,41,2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Bodies for all compound statements must be enclosed in **curly braces**, even if the body is a single statement: diff --git a/docs/src/reference-dsl-time.md b/docs/src/reference-dsl-time.md index 7c6ba69c9b..cd6210ec55 100644 --- a/docs/src/reference-dsl-time.md +++ b/docs/src/reference-dsl-time.md @@ -56,9 +56,6 @@ treating epoch-milliseconds as epoch-seconds.
2017-07-14T02:40:00Z 49503-02-10T02:40:00Z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamYou can get the current system time, as epoch-seconds, using the @@ -116,9 +113,6 @@ We also have [sec2gmtdate](reference-dsl-builtin-functions.md#sec2gmtdate) DSL f 1970-01-01 2009-02-13 1930-11-18 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Local times with standard format; specifying timezones @@ -151,9 +145,6 @@ mlr : unknown time zone This/Is/A/Typo
1970-01-01 02:00:00 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -161,9 +152,6 @@ go tool pprof -http=:8080 foo-stream
1969-12-31 21:00:00 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -187,9 +175,6 @@ go tool pprof -http=:8080 foo-stream 1969-12-31 21:00:00 1969-12-31 946789445 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -211,9 +196,6 @@ go tool pprof -http=:8080 foo-stream 1969-12-31 21:00:00 1969-12-31 946789445 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNote that for local times, Miller omits the `T` and the `Z` you see in GMT times. @@ -232,9 +214,6 @@ We also have the
1970-01-01 02:00:00 1969-12-31T22:00:00Z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -250,9 +229,6 @@ go tool pprof -http=:8080 foo-stream 1970-01-01 02:00:00 1970-01-01T03:00:00Z 1969-12-31T22:00:00Z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Custom formats: strptime and strftime @@ -346,9 +322,6 @@ Examples:
1970-01-01T00:00:00Z 1970-01-01T00:00:00Z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -367,9 +340,6 @@ go tool pprof -http=:8080 foo-stream 1970-01-01 00:00:00 +0000 Thursday, January 1, 1970 09:33 PM -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamUnfortunately, names from `%A` and `%B` are only available in English, as an artifact of a design @@ -406,9 +376,6 @@ For historical reasons, Miller's `strftime` and `strptime` use different format 1970-01-02 10:17:36.789000 (error) 123456.789 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## strptime_local and strftime_local @@ -442,9 +409,6 @@ Wednesday, December 31, 1969 1970-01-01 08:00:00 +0800 Thursday, January 1, 1970 1582992000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -470,9 +434,6 @@ Wednesday, December 31, 1969 1970-01-01 08:00:00 +0800 Thursday, January 1, 1970 1582992000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Relative times diff --git a/docs/src/reference-dsl-unset-statements.md b/docs/src/reference-dsl-unset-statements.md index bc0852804c..d7ced4177d 100644 --- a/docs/src/reference-dsl-unset-statements.md +++ b/docs/src/reference-dsl-unset-statements.md @@ -38,9 +38,6 @@ b=pan,i=2,y=0.522151 b=wye,i=3,y=0.338318 b=wye,i=4,y=0.134188 b=pan,i=5,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream This can also be done, of course, using `mlr cut -x`. You can also clear out-of-stream or local variables, at the base name level, or at an indexed sublevel: @@ -65,9 +62,6 @@ This can also be done, of course, using `mlr cut -x`. You can also clear out-of- } } {} -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -100,9 +94,6 @@ go tool pprof -http=:8080 foo-stream } } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIf you use `unset all` (or `unset @*` which is synonymous), that will unset all out-of-stream variables which have been assigned up to that point. diff --git a/docs/src/reference-dsl-user-defined-functions.md b/docs/src/reference-dsl-user-defined-functions.md index 0a1ac5be6a..d2be5a1625 100644 --- a/docs/src/reference-dsl-user-defined-functions.md +++ b/docs/src/reference-dsl-user-defined-functions.md @@ -45,9 +45,6 @@ eks pan 2 0.758679 0.522151 3.6808304227112796 2 wye wye 3 0.204603 0.338318 1.7412477437471126 6 eks wye 4 0.381399 0.134188 18.588317372151177 24 wye pan 5 0.573288 0.863624 211.38663947090302 120 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Properties of user-defined functions: @@ -102,9 +99,6 @@ NR=4 numcalls=10 NR=5 numcalls=15 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Properties of user-defined subroutines: @@ -185,9 +179,6 @@ purple triangle false 7 65 80.1405 5.8240 purple:triangle yellow circle true 8 73 63.9785 4.2370 yellow:circle yellow circle true 9 87 63.5058 8.3350 yellow:circle purple square false 10 91 72.3735 8.2430 purple:square -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -214,9 +205,6 @@ purple triangle false 7 65 80.1405 5.8240 purple:triangle above yellow circle true 8 73 63.9785 4.2370 yellow:circle above yellow circle true 9 87 63.5058 8.3350 yellow:circle above purple square false 10 91 72.3735 8.2430 purple:square above -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNote that you need a semicolon after the closing curly brace of the function literal. @@ -250,9 +238,6 @@ purple triangle false 7 65 80.1405 5.8240 purple:triangle above yellow circle true 8 73 63.9785 4.2370 yellow:circle above yellow circle true 9 87 63.5058 8.3350 yellow:circle above purple square false 10 91 72.3735 8.2430 purple:square above -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream See the [page on higher-order functions](reference-dsl-higher-order-functions.md) for more. diff --git a/docs/src/reference-dsl-variables.md b/docs/src/reference-dsl-variables.md index e2144864cb..40c63d8a27 100644 --- a/docs/src/reference-dsl-variables.md +++ b/docs/src/reference-dsl-variables.md @@ -36,23 +36,15 @@ If field names have **special characters** such as `.` then you can use braces, You may also use a **computed field name** in square brackets, e.g. -
+echo a=3,b=4 | mlr filter '$["x"] < 0.5'--Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream -echo s=green,t=blue,a=3,b=4 | mlr put '$[$s."_".$t] = $a * $b's=green,t=blue,a=3,b=4,green_blue=12 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNotes: @@ -82,9 +74,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -96,9 +85,6 @@ a=eks,b=pan,NEW=2,x=0.758679,y=0.522151 a=wye,b=wye,NEW=3,x=0.204603,y=0.338318 a=eks,b=wye,NEW=4,x=0.381399,y=0.134188 a=wye,b=pan,NEW=5,x=0.573288,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -110,9 +96,6 @@ a=eks,b=pan,i=NEW,x=0.758679,y=0.522151 a=wye,b=wye,i=NEW,x=0.204603,y=0.338318 a=eks,b=wye,i=NEW,x=0.381399,y=0.134188 a=wye,b=pan,i=NEW,x=0.573288,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -124,9 +107,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,NEW=b a=wye,b=wye,i=3,x=0.204603,y=0.338318,NEW=i a=eks,b=wye,i=4,x=0.381399,y=0.134188,NEW=x a=wye,b=pan,i=5,x=0.573288,y=0.863624,NEW=y -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -138,9 +118,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,NEW=pan a=wye,b=wye,i=3,x=0.204603,y=0.338318,NEW=3 a=eks,b=wye,i=4,x=0.381399,y=0.134188,NEW=0.381399 a=wye,b=pan,i=5,x=0.573288,y=0.863624,NEW=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -152,9 +129,6 @@ a=eks,b=NEW,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=NEW,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=NEW,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=NEW -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamRight-hand side accesses to non-existent fields -- i.e. with index less than 1 or greater than `NF` -- return an absent value. Likewise, left-hand side accesses only refer to fields which already exist. For example, if a field has 5 records then assigning the name or value of the 6th (or 600th) field results in a no-op. @@ -168,9 +142,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -182,9 +153,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Out-of-stream variables @@ -202,9 +170,6 @@ You may use a **computed key** in square brackets, e.g.
green_blue=12 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamOut-of-stream variables are **scoped** to the `put` command in which they appear. In particular, if you have two or more `put` commands separated by `then`, each put will have its own set of out-of-stream variables: @@ -227,9 +192,6 @@ a=10,b=2,c=3 a=40,b=5,c=6 sum=5 sum=50 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Out-of-stream variables' **extent** is from the start to the end of the record stream, i.e. every time the `put` or `filter` statement referring to them is executed. @@ -257,9 +219,6 @@ a=wye,x_count=2 a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -269,9 +228,6 @@ go tool pprof -http=:8080 foo-stream a=pan,x_count=1,x_sum=0.346791 a=eks,x_count=2,x_sum=1.140078 a=wye,x_count=2,x_sum=0.777891 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIndices can be arbitrarily deep -- here there are two or more of them: @@ -311,9 +267,6 @@ a=hat,b=zee,x_count=385,x_sum=196.3494502965293 a=hat,b=eks,x_count=389,x_sum=189.0067933716193 a=hat,b=hat,x_count=381,x_sum=182.8535323148762 a=hat,b=pan,x_count=363,x_sum=168.5538067327806 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The idea is that `stats1`, and other Miller verbs, encapsulate frequently-used patterns with a minimum of keystroking (and run a little faster), whereas using out-of-stream variables you have more flexibility and control in what you do. @@ -343,9 +296,6 @@ x=1,y=0,z=0 x=2,y=0.3010299956639812,z=0.5486620049392715 x=3,y=0.4771212547196624,z=0.6907396432228734 num_total=5,num_positive=3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Local variables @@ -383,9 +333,6 @@ i=7,o=13.966128063060479 i=8,o=13.99248245928659 i=9,o=15.784270485515197 i=10,o=15.37686787628025 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Things which are completely unsurprising, resembling many other languages: @@ -477,9 +424,6 @@ inner_d 70 outer_a 10 outer_b 50 outer_c 60 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream And this example demonstrates the type-declaration rules: @@ -550,9 +494,6 @@ a i y 3 wye 3.3831800000000003 4 eks 1.34188 5 wye 8.636239999999999 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Likewise, you can assign map literals to out-of-stream variables or local variables; pass them as arguments to user-defined functions, return them from functions, and so on: @@ -572,9 +513,6 @@ a=eks,x=151.7358 a=wye,x=40.9206 a=eks,x=76.2798 a=wye,x=114.6576 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Like out-of-stream and local variables, map literals can be multi-level: @@ -608,9 +546,6 @@ Like out-of-stream and local variables, map literals can be multi-level: "non-numeric": 10 } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream See also the [Maps page](reference-main-maps.md). @@ -638,9 +573,6 @@ read/write access to environment variables, e.g. `ENV["HOME"]` or a=eks,b=pan,i=2,x=0.758679,y=0.522151 1=pan,2=pan,3=1,4=0.3467901443380824,5=0.7268028627434533 a=wye,b=eks,i=10000,x=0.734806020620654365,y=0.884788571337605134 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -663,9 +595,6 @@ a=wye,b=eks,i=10000,x=0.734806020620654365,y=0.884788571337605134,fnr=2 a=pan,b=wye,i=10001,x=0.870530722602517626,y=0.009854780514656930,fnr=3 a=hat,b=wye,i=10002,x=0.321507044286237609,y=0.568893318795083758,fnr=4 a=pan,b=zee,i=10003,x=0.272054845593895200,y=0.425789896597056627,fnr=5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamTheir values of `NF`, `NR`, `FNR`, `FILENUM`, and `FILENAME` change from one @@ -684,9 +613,6 @@ Their **scope is global**: you can refer to them in any `filter` or `put` statem a,b,c,nr 1,2,3,1 4,5,6,2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -700,9 +626,6 @@ a,b,c,nr 4,5,6,2 4,5,6,2 4,5,6,2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThe **extent** is for the duration of the put/filter: in a `begin` statement (which executes before the first input record is consumed) you will find `NR=1` and in an `end` statement (which is executed after the last input record is consumed) you will find `NR` to be the total number of records ingested. @@ -916,9 +839,6 @@ Example recursive copy of out-of-stream variables: "count": 5 } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Example of out-of-stream variable assigned to full stream record, where the 2nd record is stashed, and the 4th record is overwritten with that: @@ -932,9 +852,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=pan,i=5,x=0.573288,y=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Example of full stream record assigned to an out-of-stream variable, finding the record for which the `x` field has the largest value in the input stream: @@ -959,9 +876,6 @@ a=wye,b=pan,i=5,x=0.573288,y=0.863624
a b i x y eks pan 2 0.758679 0.522151 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Keywords for filter and put diff --git a/docs/src/reference-dsl.md b/docs/src/reference-dsl.md index f9cd470320..46651921a0 100644 --- a/docs/src/reference-dsl.md +++ b/docs/src/reference-dsl.md @@ -39,9 +39,6 @@ Example: a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream * Verbs are coded in Go @@ -59,9 +56,6 @@ Example: a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream * You get to write your own DSL expressions @@ -126,9 +120,6 @@ apple,37,1 ball,28,2 cat,54,3 end -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The `print` statements for `begin` and `end` went out before the first record @@ -168,9 +159,6 @@ you might retain only the records whose `a` field has value `eks`:
a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=eks,b=wye,i=4,x=0.381399,y=0.134188 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamor you might add a new field which is a function of existing fields: @@ -184,9 +172,6 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,ab=eks_pan a=wye,b=wye,i=3,x=0.204603,y=0.338318,ab=wye_wye a=eks,b=wye,i=4,x=0.381399,y=0.134188,ab=eks_wye a=wye,b=pan,i=5,x=0.573288,y=0.863624,ab=wye_pan -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Differences between put and filter @@ -221,9 +206,6 @@ purple triangle false 5 51 81.2290 8.5910 high rate red square false 6 64 77.1991 9.5310 high rate purple triangle false 7 65 80.1405 5.8240 low rate purple square false 10 91 72.3735 8.2430 high rate -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -245,9 +227,6 @@ red square false 6 64 77.1991 9.5310 squ are yellow circle true 8 73 63.9785 4.2370 cir cle yellow circle true 9 87 63.5058 8.3350 cir cle purple square false 10 91 72.3735 8.2430 squ are -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/reference-main-arrays.md b/docs/src/reference-main-arrays.md index 840b75a787..abdc3bb63b 100644 --- a/docs/src/reference-main-arrays.md +++ b/docs/src/reference-main-arrays.md @@ -46,9 +46,6 @@ Array literals are written in square brackets braces with integer indices. Array 99, true ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream As with maps and argument-lists, trailing commas are supported: @@ -67,9 +64,6 @@ As with maps and argument-lists, trailing commas are supported:
["a", "b", "c"] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamAlso note that several [built-in functions](reference-dsl-builtin-functions.md) operate on arrays and/or return arrays. @@ -114,9 +108,6 @@ while positive indices read forward from the start. If an array has length `n` t 50 [10, 20] [40, 50] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Slicing @@ -144,9 +135,6 @@ x[4], x[5]]`. [30, 40, 50] [10, 20, 30, 40, 50] [20, 30, 40] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Out-of-bounds indexing @@ -169,9 +157,6 @@ behavior intentionally imitates Python.) 10 50 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -188,9 +173,6 @@ go tool pprof -http=:8080 foo-stream [10, 20] [10, 20, 30, 40, 50] [] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Auto-create results in maps @@ -215,9 +197,6 @@ as-yet-assigned local variable or out-of-stream variable results in "square": 8.2430, "circle": 8.3350 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream *This also means that auto-create results in maps, not arrays, even if keys are integers.* @@ -245,9 +224,6 @@ If you want to auto-extend an [array](reference-main-arrays.md), initialize it e "4": 7.4670 } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Auto-extend and null-gaps @@ -286,9 +262,6 @@ are called **null-gaps**.
["a", "b"] ["a", null, null, null, "e"] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Unset as shift @@ -308,9 +281,6 @@ Unsetting an array index results in shifting all higher-index elements down by o
["a", "b", "c", "d", "e"] ["a", "c", "d", "e"] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamMore generally, you can get shift and pop operations by unsetting indices 1 and -1: diff --git a/docs/src/reference-main-compressed-data.md b/docs/src/reference-main-compressed-data.md index f188cd9424..a54ed8026b 100644 --- a/docs/src/reference-main-compressed-data.md +++ b/docs/src/reference-main-compressed-data.md @@ -46,9 +46,6 @@ red,square,false,6,64,77.1991,9.5310 yellow,triangle,true,1,11,43.6498,9.8870 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream This will decompress the input data on the fly, while leaving the disk file unmodified. This helps you save disk space, at the cost of some additional runtime CPU usage to decompress the data. @@ -84,9 +81,6 @@ red,square,false,6,64,77.1991,9.5310 yellow,triangle,true,1,11,43.6498,9.8870 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The benefit of `--prepipe` is that Miller will run the specified program once per diff --git a/docs/src/reference-main-data-types.md b/docs/src/reference-main-data-types.md index 280b1c63bd..7505acb814 100644 --- a/docs/src/reference-main-data-types.md +++ b/docs/src/reference-main-data-types.md @@ -76,9 +76,6 @@ Examples: a,b,c 1.2,3,true 4,5.6,buongiorno -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -126,9 +123,6 @@ f 8.9 tf float g 15.9 tg float -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamOn input, string values representable as boolean (e.g. `"true"`, `"false"`) @@ -159,9 +153,6 @@ or the id,blob 100,"{""a"":1,""b"":[2,3,4]}" 105,"{""a"":6,""b"":[7,8,9]}" -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -178,9 +169,6 @@ go tool pprof -http=:8080 foo-stream "blob": "{\"a\":6,\"b\":[7,8,9]}" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -203,9 +191,6 @@ go tool pprof -http=:8080 foo-stream } } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -228,9 +213,6 @@ go tool pprof -http=:8080 foo-stream } } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThese have their respective operations to convert back to string: the diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 5a89323a27..a71e0aa8ea 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -33,9 +33,6 @@ Here are flags you can use when invoking Miller. For example, when you type "rate": 9.8870 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream the `--icsv` and `--ojson` bits are _flags_. See the [Miller command @@ -376,7 +373,6 @@ These are flags for profiling Miller performance. **Flags:** * `--cpuprofile {CPU-profile file name}`: Create a CPU-profile file for performance analysis. Instructions will be printed to stderr. This flag must be the very first thing after 'mlr' on the command line. -* `--memprofile {Memory-profile file name}`: Create a memory-profile file for performance analysis. Instructions will be printed to stderr. This flag must be the very first thing after 'mlr' on the command line. * `--time`: Print elapsed execution time in seconds to stderr at the end of the execution of the program. * `--traceprofile`: Create a trace-profile file for performance analysis. Instructions will be printed to stderr. This flag must be the very first thing after 'mlr' on the command line. diff --git a/docs/src/reference-main-maps.md b/docs/src/reference-main-maps.md index 53f0f2b3c9..4d8942d8d4 100644 --- a/docs/src/reference-main-maps.md +++ b/docs/src/reference-main-maps.md @@ -48,9 +48,6 @@ _Map literals_ are written in curly braces with string keys any [Miller data typ } true true -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream As with arrays and argument-lists, trailing commas are supported: @@ -73,9 +70,6 @@ As with arrays and argument-lists, trailing commas are supported: "b": 2, "c": 3 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The current record, accessible using `$*`, is a map. @@ -107,9 +101,6 @@ Color is yellow "rate": 0.0130 } Color is red -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The collection of all [out-of-stream variables](reference-dsl-variables.md#out-of-stream0variables), `@*`, is a map. @@ -135,9 +126,6 @@ The collection of all [out-of-stream variables](reference-dsl-variables.md#out-o }, "last_color": "purple" } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Also note that several [built-in functions](reference-dsl-builtin-functions.md) operate on maps and/or return maps. @@ -177,9 +165,6 @@ in **auto-create** of that variable as a map variable: "square": 8.2430, "circle": 8.3350 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream *This also means that auto-create results in maps, not arrays, even if keys are integers.* @@ -207,9 +192,6 @@ If you want to auto-extend an [array](reference-main-arrays.md), initialize it e "4": 7.4670 } } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Auto-deepen @@ -235,9 +217,6 @@ red square 17.011 red circle 2.9010 purple triangle 14.415 purple square 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Looping diff --git a/docs/src/reference-main-null-data.md b/docs/src/reference-main-null-data.md index 32a4000811..6ddd2518f8 100644 --- a/docs/src/reference-main-null-data.md +++ b/docs/src/reference-main-null-data.md @@ -69,9 +69,6 @@ a=1,b=8 a=,b=4 x=9,b=10 a=5,b=7 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -83,9 +80,6 @@ a=3,b=2 a=5,b=7 a=,b=4 x=9,b=10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -97,9 +91,6 @@ a=5,b=7 a=3,b=2 a=1,b=8 x=9,b=10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream* Functions/operators which have one or more *empty* arguments produce empty output: e.g. @@ -109,9 +100,6 @@ go tool pprof -http=:8080 foo-stream
x=2,y=3,a=5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -119,9 +107,6 @@ go tool pprof -http=:8080 foo-stream
x=,y=3,a= -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -129,9 +114,6 @@ go tool pprof -http=:8080 foo-stream
x=,y=3,a=,b=1.0986122886681096 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamwith the exception that the `min` and `max` functions are special: if one argument is non-null, it wins: @@ -141,9 +123,6 @@ with the exception that the `min` and `max` functions are special: if one argume
x=,y=3,a=3,b= -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream* Functions of *absent* variables (e.g. `mlr put '$y = log10($nonesuch)'`) evaluate to absent, and arithmetic/bitwise/boolean operators with both operands being absent evaluate to absent. Arithmetic operators with one absent operand return the other operand. More specifically, absent values act like zero for addition/subtraction, and one for multiplication: Furthermore, **any expression which evaluates to absent is not stored in the left-hand side of an assignment statement**: @@ -153,9 +132,6 @@ go tool pprof -http=:8080 foo-stream
x=2,y=3,b=3,c=5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -163,9 +139,6 @@ go tool pprof -http=:8080 foo-stream
x=2,y=3,a=2,b=3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream* Likewise, for assignment to maps, **absent-valued keys or values result in a skipped assignment**. @@ -193,9 +166,6 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -207,9 +177,6 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true,loadmillis=320 record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false,loadmillis=970 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -221,9 +188,6 @@ record_count=100,resource=/path/to/file,loadmillis=0 resource=/path/to/second/file,loadsec=0.32,ok=true,loadmillis=320 record_count=150,resource=/path/to/second/file,loadmillis=0 resource=/some/other/path,loadsec=0.97,ok=false,loadmillis=970 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Arithmetic rules diff --git a/docs/src/reference-main-number-formatting.md b/docs/src/reference-main-number-formatting.md index b2ef0d3819..627cb17484 100644 --- a/docs/src/reference-main-number-formatting.md +++ b/docs/src/reference-main-number-formatting.md @@ -35,9 +35,6 @@ pipe the output to something else, particularly CSV. I use Miller's pretty-print
x= 3.100,y= 4.300 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -45,9 +42,6 @@ go tool pprof -http=:8080 foo-stream
x=3.10000000e+00,y=4.30000000e+00 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## The format-values verb @@ -66,9 +60,6 @@ put`. For example:
x=3.1,y=4.3,z=13.330000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -76,9 +67,6 @@ go tool pprof -http=:8080 foo-stream
x=0xffff,y=0xff,z=00feff01 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamInput conversion from hexadecimal is done automatically on fields handled by `mlr put` and `mlr filter` as long as the field value begins with `0x`. To apply output conversion to hexadecimal on a single column, you may use `fmtnum`, or the keystroke-saving [`hexfmt`](reference-dsl-builtin-functions.md#hexfmt) function. Example: @@ -88,9 +76,6 @@ Input conversion from hexadecimal is done automatically on fields handled by `ml
x=0xffff,y=0xff,z=16711425 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -98,7 +83,4 @@ go tool pprof -http=:8080 foo-stream
x=0xffff,y=0xff,z=0xfeff01 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/reference-main-overview.md b/docs/src/reference-main-overview.md index 2f5b662202..cc9c3a0b3b 100644 --- a/docs/src/reference-main-overview.md +++ b/docs/src/reference-main-overview.md @@ -34,9 +34,6 @@ For example, reading from a file: color shape flag k index quantity rate red square true 2 15 79.2778 0.0130 yellow triangle true 1 11 43.6498 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -46,9 +43,6 @@ go tool pprof -http=:8080 foo-stream color shape flag k index quantity rate red square true 2 15 79.2778 0.0130 yellow triangle true 1 11 43.6498 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamReading from standard input: @@ -60,9 +54,6 @@ Reading from standard input: color shape flag k index quantity rate red square true 2 15 79.2778 0.0130 yellow triangle true 1 11 43.6498 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The rest of this reference section gives you full information on each of these parts of the command line. @@ -88,9 +79,6 @@ Example of using a verb for data processing: a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream * Verbs are coded in Go @@ -108,9 +96,6 @@ Example of doing the same thing using a DSL expression: a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream * You get to write your own expressions in Miller's programming language diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md index c4d2efca0d..f15b55f596 100644 --- a/docs/src/reference-main-regular-expressions.md +++ b/docs/src/reference-main-regular-expressions.md @@ -59,9 +59,6 @@ name=bull,regex=^b[ou]ll$
name=jane,regex=^j.*e$ name=bull,regex=^b[ou]ll$ -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Regex captures diff --git a/docs/src/reference-main-separators.md b/docs/src/reference-main-separators.md index 3f63a2f7f2..c13241e659 100644 --- a/docs/src/reference-main-separators.md +++ b/docs/src/reference-main-separators.md @@ -74,9 +74,6 @@ a=4,b=5,c=6
c:3;a:1;b:2 c:6;a:4;b:5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -86,9 +83,6 @@ go tool pprof -http=:8080 foo-stream color,shape,flag,k,index,quantity,rate yellow,triangle,true,1,11,43.6498,9.8870 red,square,true,2,15,79.2778,0.0130 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -98,9 +92,6 @@ go tool pprof -http=:8080 foo-stream color|shape|flag|k|index|quantity|rate yellow|triangle|true|1|11|43.6498|9.8870 red|square|true|2|15|79.2778|0.0130 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIf your data has non-default separators and you don't want to change those @@ -121,9 +112,6 @@ a:4;b:5;c:6
c:3;a:1;b:2 c:6;a:4;b:5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Multi-character separators @@ -138,9 +126,6 @@ restrictions), IRS must be `\n` and IFS must be a single character.
c:=3;;;a:=1;;;b:=2 c:=6;;;a:=4;;;b:=5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIf your data has field separators which are one or more consecutive spaces, you @@ -181,9 +166,6 @@ early light what so 2 light 3 what 4 so -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Regular-expression separators @@ -273,9 +255,6 @@ their values indicate what you specified at the command line -- so their use is
a:1;b:2;c:3;d:>>>,|||;<<< a:4;b:5;c:6;d:>>>,|||;<<< -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Which separators apply to which file formats diff --git a/docs/src/reference-main-strings.md b/docs/src/reference-main-strings.md index 0ca67ad338..df35284f43 100644 --- a/docs/src/reference-main-strings.md +++ b/docs/src/reference-main-strings.md @@ -41,9 +41,6 @@ purple triangle false 7 65 80.1405 5.8240 purple:triangle yellow circle true 8 73 63.9785 4.2370 yellow:circle yellow circle true 9 87 63.5058 8.3350 yellow:circle purple square false 10 91 72.3735 8.2430 purple:square -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Also see the [list of string-related built-in functions](reference-dsl-builtin-functions.md#string-functions). @@ -95,9 +92,6 @@ a e ab de -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Slicing @@ -124,9 +118,6 @@ ab cde abcde bcd -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Out-of-bounds indexing @@ -149,9 +140,6 @@ accesses result in trimming the indices, resulting in a short string or even the a e (error) -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -168,9 +156,6 @@ go tool pprof -http=:8080 foo-stream "ab" "abcde" "" -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Escape sequences for string literals diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 2a9b53349f..92e694d3dd 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -28,9 +28,6 @@ yellow triangle true 1 11 43.6498 9.8870 red square true 2 15 79.2778 0.0130 red circle true 3 16 13.8103 2.9010 red square false 4 48 77.5542 7.4670 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream the `sort` and `head` bits are _verbs_. See the [Miller command @@ -82,9 +79,6 @@ Options:
a=b,c=d,e=f -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -92,9 +86,6 @@ go tool pprof -http=:8080 foo-stream
a=b,c=d,e=f,4=g -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## bar @@ -133,9 +124,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -148,9 +136,6 @@ eks pan 2 ******************************.......... ********************......... wye wye 3 ********................................ *************........................... eks wye 4 ***************......................... *****................................... wye pan 5 **********************.................. **********************************...... -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -163,9 +148,6 @@ eks pan 2 ***************************************# ************************..... wye wye 3 #....................................... #....................................... eks wye 4 #....................................... #....................................... wye pan 5 **********************************...... ***************************************# -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -178,9 +160,6 @@ eks pan 2 [0.204603]*******************#[0.758679] [0.134188]**********......... wye wye 3 [0.204603]#...................[0.758679] [0.134188]*****...............[0.863624] eks wye 4 [0.204603]******..............[0.758679] [0.134188]#...................[0.863624] wye pan 5 [0.204603]*************.......[0.758679] [0.134188]*******************#[0.863624] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## bootstrap @@ -298,9 +277,6 @@ a,b,c 1,2,3 4,5,6 7,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -318,9 +294,6 @@ c 6 a 7 b 8 c 9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -331,9 +304,6 @@ n,a,b,c 1,1,2,3 2,4,5,6 3,7,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -346,9 +316,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -361,9 +328,6 @@ n a b i x y 1 wye wye 3 0.204603 0.338318 2 eks wye 4 0.381399 0.134188 2 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## check @@ -418,9 +382,6 @@ leave off -k as well as -v. " Preference ": " yellow" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -441,9 +402,6 @@ go tool pprof -http=:8080 foo-stream "Preference": " yellow" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -464,9 +422,6 @@ go tool pprof -http=:8080 foo-stream " Preference ": "yellow" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -487,9 +442,6 @@ go tool pprof -http=:8080 foo-stream "Preference": "yellow" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamFunction links: @@ -520,9 +472,6 @@ Options:
count=10000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -534,9 +483,6 @@ a=eks,count=1965 a=wye,count=1966 a=zee,count=2047 a=hat,count=1941 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -544,9 +490,6 @@ go tool pprof -http=:8080 foo-stream
count=5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -558,9 +501,6 @@ b=wye,count=2057 b=zee,count=1943 b=eks,count=2008 b=hat,count=2050 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -568,9 +508,6 @@ go tool pprof -http=:8080 foo-stream
count=5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -602,9 +539,6 @@ a=zee,b=hat,count=409 a=wye,b=zee,count=385 a=eks,b=hat,count=417 a=wye,b=eks,count=386 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## count-distinct @@ -658,9 +592,6 @@ a=wye,b=wye,count=377 a=eks,b=pan,count=371 a=hat,b=pan,count=363 a=eks,b=zee,count=357 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -677,9 +608,6 @@ field=b,value=wye,count=2057 field=b,value=zee,count=1943 field=b,value=eks,count=2008 field=b,value=hat,count=2050 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -711,9 +639,6 @@ a=wye,b=wye,someothername=377 a=eks,b=pan,someothername=371 a=hat,b=pan,someothername=363 a=eks,b=zee,someothername=357 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -721,9 +646,6 @@ go tool pprof -http=:8080 foo-stream
count=25 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## count-similar @@ -766,9 +688,6 @@ zee eks 17 0.29081949506712723 0.054478717073354166 hat zee 18 0.05727869223575699 0.13343527626645157 zee pan 19 0.43144132839222604 0.8442204830496998 eks wye 20 0.38245149780530685 0.4730652428100751 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -796,9 +715,6 @@ zee eks 17 0.29081949506712723 0.054478717073354166 5 zee pan 19 0.43144132839222604 0.8442204830496998 5 hat wye 9 0.03144187646093577 0.7495507603507059 2 hat zee 18 0.05727869223575699 0.13343527626645157 2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -826,9 +742,6 @@ zee wye 8 0.5985540091064224 0.976181385699006 5 zee pan 12 0.3676141320555616 0.23614420670296965 5 zee eks 17 0.29081949506712723 0.054478717073354166 5 zee pan 19 0.43144132839222604 0.8442204830496998 5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## cut @@ -867,9 +780,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -882,9 +792,6 @@ i x y 3 0.204603 0.338318 4 0.381399 0.134188 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -892,9 +799,6 @@ go tool pprof -http=:8080 foo-stream
a=1,b=2,c=3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -902,9 +806,6 @@ go tool pprof -http=:8080 foo-stream
b=2,c=3,a=1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## decimate @@ -963,9 +864,6 @@ a,b,c 1,,3 4,5,6 7,5,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -976,9 +874,6 @@ a,b,c 1,,3 4,5,6 7,,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## fill-empty @@ -1012,9 +907,6 @@ a,b,c 1,N/A,3 4,5,6 7,N/A,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1025,9 +917,6 @@ a,b,c 1,something,3 4,5,6 7,something,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## filter @@ -1187,9 +1076,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1202,9 +1088,6 @@ eks pan 2.000000 0.758679 0.522151 wye wye 3.000000 0.204603 0.338318 eks wye 4.000000 0.381399 0.134188 wye pan 5.000000 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1217,9 +1100,6 @@ XeksX XpanX 00000002 7.586790e-01 5.221510e-01 XwyeX XwyeX 00000003 2.046030e-01 3.383180e-01 XeksX XwyeX 00000004 3.813990e-01 1.341880e-01 XwyeX XpanX 00000005 5.732880e-01 8.636240e-01 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1232,9 +1112,6 @@ XeksX XpanX 2.000000e+00 7.586790e-01 5.221510e-01 XwyeX XwyeX 3.000000e+00 2.046030e-01 3.383180e-01 XeksX XwyeX 4.000000e+00 3.813990e-01 1.341880e-01 XwyeX XpanX 5.000000e+00 5.732880e-01 8.636240e-01 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## fraction @@ -1301,9 +1178,6 @@ male blue 2034 0.270083654229186 male purple 12 0.0015934138892577346 male yellow 1192 0.15827911299960165 male orange 448 0.0594874518656221 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Using `-g` we can split those out by gender, or by color: @@ -1325,9 +1199,6 @@ male blue 2034 0.5014792899408284 male purple 12 0.0029585798816568047 male yellow 1192 0.2938856015779093 male orange 448 0.11045364891518737 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1347,9 +1218,6 @@ male blue 2034 0.8578658793757908 male purple 12 0.025 male yellow 1192 0.9974895397489539 male orange 448 0.9634408602150538 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamWe can see, for example, that 70.9% of females have red (on the left) while 94.5% of reds are for females. @@ -1373,9 +1241,6 @@ male blue 2034 27.0083654229186 male purple 12 0.15934138892577346 male yellow 1192 15.827911299960165 male orange 448 5.94874518656221 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Another often-used idiom is to convert from a point distribution to a cumulative distribution, also known as "running sums". Here, you can use `-c`: @@ -1397,9 +1262,6 @@ male blue 2034 78.06400212455186 male purple 12 78.22334351347763 male yellow 1192 94.0512548134378 male orange 448 100 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1419,9 +1281,6 @@ male blue 2034 59.27021696252466 male purple 12 59.56607495069034 male yellow 1192 88.95463510848126 male orange 448 100 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## gap @@ -1489,9 +1348,6 @@ eks wye 4 0.381399 0.134188 pan pan 1 0.346791 0.726802 wye wye 3 0.204603 0.338318 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1504,9 +1360,6 @@ eks pan 2 0.758679 0.522151 eks wye 4 0.381399 0.134188 wye wye 3 0.204603 0.338318 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIn this example, since the sort is on field `a`, the first step is to group together all records having the same value for field `a`; the second step is to sort the distinct `a`-field values `pan`, `eks`, and `wye` into `eks`, `pan`, and `wye`; the third step is to print out the record-list for `a=eks`, then the record-list for `a=pan`, then the record-list for `a=wye`. The group-by operation omits the middle sort and just puts like records together, for those times when a sort isn't desired. In particular, the ordering of group-by fields for group-by is the order in which they were encountered in the data stream, which in some cases may be more interesting to you. @@ -1534,9 +1387,6 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1551,9 +1401,6 @@ resource loadsec ok record_count resource 100 /path/to/file 150 /path/to/second/file -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## having-fields @@ -1589,9 +1436,6 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1603,9 +1447,6 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1615,9 +1456,6 @@ go tool pprof -http=:8080 foo-stream resource=/path/to/file,loadsec=0.45,ok=true resource=/path/to/second/file,loadsec=0.32,ok=true resource=/some/other/path,loadsec=0.97,ok=false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## head @@ -1646,9 +1484,6 @@ pan pan 1 0.3467901443380824 0.7268028627434533 eks pan 2 0.7586799647899636 0.5221511083334797 wye wye 3 0.20460330576630303 0.33831852551664776 eks wye 4 0.38139939387114097 0.13418874328430463 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1661,9 +1496,6 @@ wye wye 3 0.20460330576630303 0.33831852551664776 eks zee 7 0.6117840605678454 0.1878849191181694 zee eks 17 0.29081949506712723 0.054478717073354166 wye hat 24 0.7286126830627567 0.19441962592638418 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## histogram @@ -1703,9 +1535,6 @@ bin_lo bin_hi x_count x2_count x3_count 0.7 0.8 1007 560 420 0.8 0.9 986 571 383 0.9 1 1013 507 341 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1725,9 +1554,6 @@ my_bin_lo my_bin_hi my_x_count my_x2_count my_x3_count 0.7 0.8 1007 560 420 0.8 0.9 986 571 383 0.9 1 1013 507 341 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## join @@ -1803,9 +1629,6 @@ id name 300 carol 400 david 500 edgar -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1833,9 +1656,6 @@ present 200 present 200 present 400 present 300 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1864,9 +1684,6 @@ id name status 200 bob present 400 david present 300 carol present -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSame, but with sorting the input first: @@ -1897,9 +1714,6 @@ id name status 400 david present 400 david missing 400 david present -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Same, but showing only unpaired records: @@ -1915,9 +1729,6 @@ missing 600 id name 500 edgar -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Use prefixing options to disambiguate between otherwise identical non-join field names: @@ -1931,9 +1742,6 @@ a b c 1 4 5 1 2 3 1 4 5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -1945,9 +1753,6 @@ a left_b left_c right_b right_c 1 4 5 2 3 1 2 3 4 5 1 4 5 4 5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamUse zero join columns: @@ -1961,9 +1766,6 @@ left_a left_b left_c right_a right_b right_c 1 4 5 1 2 3 1 2 3 1 4 5 1 4 5 1 4 5 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## json-parse @@ -2049,9 +1851,6 @@ John,23,present Fred,34,present Alice,56,missing Carol,45,present -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2063,9 +1862,6 @@ John,23,present Fred,34,present Alice,56,missing Carol,45,present -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2077,9 +1873,6 @@ John 23 present Fred 34 present Alice 56 missing Carol 45 present -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## latin1-to-utf8 @@ -2140,9 +1933,6 @@ shape count circle 2591 triangle 3372 square 4115 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2155,9 +1945,6 @@ triangle orange 107 square orange 128 circle green 287 circle purple 289 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2170,9 +1957,6 @@ triangle orange 107 square orange 128 circle green 287 circle purple 289 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2185,9 +1969,6 @@ triangle orange square orange circle green circle purple -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSee also [most-frequent](reference-verbs.md#most-frequent). @@ -2259,9 +2040,6 @@ a_in a_out b_in b_out 436 490 446 195 526 320 963 780 220 888 705 831 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2272,9 +2050,6 @@ a_min a_max a_sum b_min b_max b_sum 436 490 926 195 446 641 320 526 846 780 963 1743 220 888 1108 705 831 1536 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2285,9 +2060,6 @@ a_in a_out b_in b_out a_sum b_sum 436 490 446 195 926 641 526 320 963 780 846 1743 220 888 705 831 1108 1536 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## most-frequent @@ -2315,9 +2087,6 @@ shape count square 4115 triangle 3372 circle 2591 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2330,9 +2099,6 @@ triangle red 1560 circle red 1207 square blue 589 square yellow 589 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2345,9 +2111,6 @@ triangle red 1560 circle red 1207 square blue 589 square yellow 589 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2360,9 +2123,6 @@ triangle red circle red square blue square yellow -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSee also [least-frequent](reference-verbs.md#least-frequent). @@ -2571,12 +2331,6 @@ a,c,e 1,3,5 2,4,5 3,5,7 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-remove-empty-columns -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Since this verb needs to read all records to see if any of them has a non-empty value for a given field name, it is non-streaming: it will ingest all records before writing any. @@ -2620,9 +2374,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2635,9 +2386,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamAs discussed in [Performance](performance.md), `sed` is significantly faster than Miller at doing this. However, Miller is format-aware, so it knows to do renames only within specified field keys and not any others, nor in field values which may happen to contain the same pattern. Example: @@ -2662,9 +2410,6 @@ a=eks,b=pan,i=2,x=0.758679,COLUMN5=0.522151 a=wye,b=wye,i=3,x=0.204603,COLUMN5=0.338318 a=eks,b=wye,i=4,x=0.381399,COLUMN5=0.134188 a=wye,b=pan,i=5,x=0.573288,COLUMN5=0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream See also [label](reference-verbs.md#label). @@ -2707,9 +2452,6 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2722,9 +2464,6 @@ i b a x y 3 wye wye 0.204603 0.338318 4 wye eks 0.381399 0.134188 5 pan wye 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -2737,9 +2476,6 @@ eks 0.758679 0.522151 2 pan wye 0.204603 0.338318 3 wye eks 0.381399 0.134188 4 wye wye 0.573288 0.863624 5 pan -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## repeat @@ -2804,9 +2540,6 @@ color=red color=green color=green color=green -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream After expansion with `repeat`, such data can then be sent on to @@ -3022,9 +2755,6 @@ i=7 i=8 i=9 i=10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3037,9 +2767,6 @@ i=28 i=32 i=36 i=40 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3052,9 +2779,6 @@ i=32 i=28 i=24 i=20 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## shuffle @@ -3102,9 +2826,6 @@ a,b,c 1,2,3 4,,6 ,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## sort @@ -3150,9 +2871,6 @@ eks wye 4 0.381399 0.134188 pan pan 1 0.346791 0.726802 wye pan 5 0.573288 0.863624 wye wye 3 0.204603 0.338318 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Here's an example filtering log data: suppose multiple threads (labeled here by color) are all logging progress counts to a single log file. The log file is (by nature) chronological, so the progress of various threads is interleaved: @@ -3202,9 +2920,6 @@ upsec color count 1.587 red 3782 1.601 red 3755 1.832 red 3717 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Any records not having all specified sort keys will appear at the end of the output, in the order they @@ -3218,9 +2933,6 @@ x=1 x=2 x=4 a=3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3231,9 +2943,6 @@ x=4 x=2 x=1 a=3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## sort-within-records @@ -3282,9 +2991,6 @@ b a c c b a 7 8 9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3308,9 +3014,6 @@ go tool pprof -http=:8080 foo-stream "c": 7 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3321,9 +3024,6 @@ a b c 1 2 3 5 4 6 9 8 7 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## split @@ -3465,9 +3165,6 @@ y_p50 0.5060212582772865 y_mean 0.5062057444929905 y_p90 0.9053657573378745 y_max 0.9999648102177897 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3480,9 +3177,6 @@ hat 0.4878988625336502 0.5131176341556505 pan 0.4973036405471583 0.49959885012092725 wye 0.4975928392133964 0.5045964890907357 zee 0.5042419022900586 0.5029967546798116 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3498,9 +3192,6 @@ purple 0.501319 0.988893 0.504571 0.988287 1.9725823278192132 1.9586678584381585 green 0.502015 0.990764 0.505359 0.990175 1.9735744947860123 1.9593496900223406 blue 0.525226 0.992655 0.485170 0.993873 1.8899578467174132 2.048504647855391 orange 0.483548 0.993635 0.480913 0.989102 2.054883899840347 2.056717119312641 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3511,9 +3202,6 @@ shape count square 4115 triangle 3372 circle 2591 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3524,9 +3212,6 @@ shape color_mode triangle red square red circle red -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## stats2 @@ -3580,9 +3265,6 @@ x2_xy_cov 0.04188382281779374 x2_xy_corr 0.630174342037994 x2_y2_cov -0.00030953725962542085 x2_y2_corr -0.0034249088761121966 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3597,9 +3279,6 @@ eks 0.0407804923685586 0.48140207967651016 1965 0.0016461239223448587 wye -0.03915349075204814 0.5255096523974456 1966 0.0015051268704373607 1 0 1966 1 0.8538317334220835 0.1267454301662969 1966 0.38991721818599295 zee 0.0027812364960399147 0.5043070448033061 2047 0.000007751652858786137 1 0 2047 1 0.8524439912011013 0.12401684308018937 2047 0.39356598090006495 hat -0.018620577041095078 0.5179005397264935 1941 0.0003520036646055585 1 0 1941 1 0.8412305086345014 0.13557328318623216 1941 0.3687944261732265 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamHere's an example simple line-fit. The `x` and `y` @@ -3690,9 +3369,6 @@ upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 donesec 25.10852919630297 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## step @@ -3905,12 +3581,6 @@ b string 10000 0 5 - eks zee i int 10000 0 10000 5000.500 1 10000 x float 10000 0 10000 0.499 0.000 1.000 y float 10000 0 10000 0.506 0.000 1.000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-summary -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3940,12 +3610,6 @@ lof - - -12499 -1.2578765057782637 -1.283461 lif - - -4999 -0.5056030637729731 -0.5156625043990937 uif - - 10001 0.9989438202376082 1.0199359148794074 uof - - 17501 1.751217262242899 1.787735124518658 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-summary -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3956,12 +3620,6 @@ field_name a b i x y mode pan wye 1 0.3467901443380824 0.7268028627434533 mean - - 5000.5 0.49860196816795804 0.5062057444929905 median pan pan 5001 0.5011592202840128 0.5060212582772865 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-summary -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## tac @@ -3985,9 +3643,6 @@ Prints the records in the input stream in reverse order. Note: this requires Mil a b c 1 2 3 4 5 6 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -3996,9 +3651,6 @@ go tool pprof -http=:8080 foo-streama b c 7 8 9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -4009,9 +3661,6 @@ a b c 7 8 9 4 5 6 1 2 3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -4022,9 +3671,6 @@ a b c filename 7 8 9 data/b.csv 4 5 6 data/a.csv 1 2 3 data/a.csv -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## tail @@ -4052,9 +3698,6 @@ blue square 1 499872 0.618906 0.263796 0.531147 6.210738 blue triangle 0 499880 0.008111 0.826727 0.473296 6.146957 yellow triangle 0 499955 0.383942 0.559529 0.511376 4.307974 yellow circle 1 499974 0.764951 0.252842 0.499699 5.013810 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4065,9 +3708,6 @@ color shape flag i u v w x yellow triangle 0 499955 0.383942 0.559529 0.511376 4.307974 blue square 1 499872 0.618906 0.263796 0.531147 6.210738 yellow circle 1 499974 0.764951 0.252842 0.499699 5.013810 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## tee @@ -4150,9 +3790,6 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4161,9 +3798,6 @@ go tool pprof -http=:8080 foo-streamtop_idx quantity_top 1 81.2290 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -4174,9 +3808,6 @@ shape top_idx quantity_top triangle 1 81.2290 square 1 79.2778 circle 1 63.9785 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -4187,9 +3818,6 @@ shape someothername quantity_top triangle 1 81.2290 square 1 79.2778 circle 1 63.9785 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -4200,9 +3828,6 @@ color shape flag k index quantity rate purple triangle false 5 51 81.2290 8.5910 red square true 2 15 79.2778 0.0130 yellow circle true 8 73 63.9785 4.2370 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -4213,9 +3838,6 @@ color shape flag k index quantity rate yellow circle true 8 73 63.9785 4.2370 red square true 2 15 79.2778 0.0130 purple triangle false 5 51 81.2290 8.5910 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## unflatten @@ -4286,9 +3908,6 @@ green,circle orange,triangle orange,square orange,circle -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4314,9 +3933,6 @@ red triangle 1560 yellow circle 356 yellow square 589 yellow triangle 468 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4344,9 +3960,6 @@ green circle 287 orange square 128 orange triangle 107 orange circle 68 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4355,9 +3968,6 @@ go tool pprof -http=:8080 foo-streamcount 18 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThe second main way to use `mlr uniq` is without group-by columns, using `-a` instead: @@ -4444,9 +4054,6 @@ red circle 1 purple square 0 red square 1 yellow triangle 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4455,9 +4062,6 @@ go tool pprof -http=:8080 foo-streamcount 7 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -4472,9 +4076,6 @@ count color shape flag 7 purple square 0 3 red square 1 2 yellow triangle 1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## unsparsify @@ -4548,9 +4149,6 @@ Examples: "w": 2 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4562,9 +4160,6 @@ a b v u x w - 2 - 1 - - 1 - 2 - 3 - - - 1 - - 2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4576,9 +4171,6 @@ a b v u x w missing 2 missing 1 missing missing 1 missing 2 missing 3 missing missing missing 1 missing missing 2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4596,9 +4188,6 @@ a v x b u v w a b u 1 2 - - - -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -4610,8 +4199,5 @@ a b v u w x - 2 - 1 - - 1 - 2 - - 3 - - 1 - 2 - -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/repl.md b/docs/src/repl.md index 4301af3433..71de28b33a 100644 --- a/docs/src/repl.md +++ b/docs/src/repl.md @@ -91,9 +91,6 @@ HELLO }GOODBYE ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Using Miller with the REPL diff --git a/docs/src/scripting.md b/docs/src/scripting.md index cdb019313c..29cac3fb71 100644 --- a/docs/src/scripting.md +++ b/docs/src/scripting.md @@ -30,9 +30,6 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Typing this out can get a bit old, if the only thing that changes for you is the filename. Some options include: @@ -75,9 +72,6 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -88,9 +82,6 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -114,9 +105,6 @@ go tool pprof -http=:8080 foo-stream "count_fraction": 0.3 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -135,9 +123,6 @@ go tool pprof -http=:8080 foo-stream "count_fraction": 0.3 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streametc. @@ -175,9 +160,6 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -188,9 +170,6 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -214,9 +193,6 @@ go tool pprof -http=:8080 foo-stream "count_fraction": 0.3 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -235,9 +211,6 @@ go tool pprof -http=:8080 foo-stream "count_fraction": 0.3 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Miller scripts on Windows @@ -274,9 +247,6 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -300,9 +270,6 @@ go tool pprof -http=:8080 foo-stream "count_fraction": 0.3 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -321,9 +288,6 @@ go tool pprof -http=:8080 foo-stream "count_fraction": 0.3 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamand so on. See also [Miller on Windows](miller-on-windows.md). diff --git a/docs/src/shapes-of-data.md b/docs/src/shapes-of-data.md index eac46a7c23..bab58b7f03 100644 --- a/docs/src/shapes-of-data.md +++ b/docs/src/shapes-of-data.md @@ -118,9 +118,6 @@ Miller records are ordered lists of key-value pairs. For NIDX format, DKVP forma
1=x,2=y,3=z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -128,9 +125,6 @@ go tool pprof -http=:8080 foo-stream
1=x,2=y,3=z,6=a,4=b,55=cde -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -138,9 +132,6 @@ go tool pprof -http=:8080 foo-stream
x,y,z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -149,9 +140,6 @@ go tool pprof -http=:8080 foo-stream## Numbering and renumbering records @@ -286,9 +259,6 @@ purple,triangle,false,7,65,80.1405,5.8240,7 yellow,circle,true,8,73,63.9785,4.2370,8 yellow,circle,true,9,87,63.5058,8.3350,9 purple,square,false,10,91,72.3735,8.2430,10 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream However, this is the record number within the original input stream -- not after any filtering you may have done: @@ -301,9 +271,6 @@ color,shape,flag,k,index,quantity,rate,nr yellow,triangle,true,1,11,43.6498,9.8870,1 yellow,circle,true,8,73,63.9785,4.2370,8 yellow,circle,true,9,87,63.5058,8.3350,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream There are two good options here. One is to use the `cat` verb with `-n`: @@ -316,9 +283,6 @@ n,color,shape,flag,k,index,quantity,rate 1,yellow,triangle,true,1,11,43.6498,9.8870 2,yellow,circle,true,8,73,63.9785,4.2370 3,yellow,circle,true,9,87,63.5058,8.3350 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The other is to keep your own counter within the `put` DSL: @@ -331,9 +295,6 @@ color,shape,flag,k,index,quantity,rate,n yellow,triangle,true,1,11,43.6498,9.8870,1 yellow,circle,true,8,73,63.9785,4.2370,2 yellow,circle,true,9,87,63.5058,8.3350,3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The difference is a matter of taste (although `mlr cat -n` puts the counter first). @@ -422,9 +383,6 @@ outer=2,middle=21,inner1=210,inner2=211 outer=3,middle=30,inner1=300,inner2=301 outer=3,middle=31,inner1=312,inner2=301 outer=3,middle=31,inner1=313,inner2=314 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream See also the [record-heterogeneity page](record-heterogeneity.md); see in diff --git a/docs/src/shell-commands.md b/docs/src/shell-commands.md index a15ebe13f1..e22713a90b 100644 --- a/docs/src/shell-commands.md +++ b/docs/src/shell-commands.md @@ -30,9 +30,6 @@ eks pan 2 0.758679 0.522151 hello world wye wye 3 0.204603 0.338318 hello world eks wye 4 0.381399 0.134188 hello world wye pan 5 0.573288 0.863624 hello world -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream1,2,3 x,y,z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -159,9 +147,6 @@ go tool pprof -http=:8080 foo-stream1=x,999=y,3=z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -169,9 +154,6 @@ go tool pprof -http=:8080 foo-stream1=x,newname=y,3=z -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream@@ -180,9 +162,6 @@ go tool pprof -http=:8080 foo-streamThe issue is that Miller's `cut`, by default, outputs cut fields in the order they appear in the input data. This design decision was made intentionally to parallel the Unix/Linux system `cut` command, which has the same semantics. @@ -245,9 +221,6 @@ rate,shape,flag 4.2370,circle,true 8.3350,circle,true 8.2430,square,false -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream3,1,2 z,x,y -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Why doesn't mlr cut put fields in the order I want? @@ -221,9 +200,6 @@ triangle,false,5.8240 circle,true,4.2370 circle,true,8.3350 square,false,8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -45,9 +42,6 @@ eks pan 2 0.758679 0.522151 {2} wye wye 3 0.204603 0.338318 {3} eks wye 4 0.381399 0.134188 {4} wye pan 5 0.573288 0.863624 {5} -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -60,9 +54,6 @@ eks pan 2 0.758679 0.522151 585d25a8ff04840f77779eeff61167dc wye wye 3 0.204603 0.338318 fb6361a373147c163e65ada94719fa16 eks wye 4 0.381399 0.134188 585d25a8ff04840f77779eeff61167dc wye pan 5 0.573288 0.863624 fb6361a373147c163e65ada94719fa16 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamNote that running a subprocess on every record takes a non-trivial amount of time. Comparing asking the system `date` command for the current time in nanoseconds versus computing it in process: diff --git a/docs/src/sorting.md b/docs/src/sorting.md index c5e8537e77..68e1f4a026 100644 --- a/docs/src/sorting.md +++ b/docs/src/sorting.md @@ -49,9 +49,6 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Sorted numerically ascending by rate: @@ -71,9 +68,6 @@ yellow circle true 9 87 63.5058 8.3350 purple triangle false 5 51 81.2290 8.5910 red square false 6 64 77.1991 9.5310 yellow triangle true 1 11 43.6498 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Sorted lexically ascending by color; then, within each color, numerically descending by quantity: @@ -93,9 +87,6 @@ red circle true 3 16 13.8103 2.9010 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 yellow triangle true 1 11 43.6498 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Example of natural sort, adapted from [https://github.com/facette/natsort](https://github.com/facette/natsort): @@ -132,9 +123,6 @@ n name 25 Xiph Xlater 40 26 Allegia 6R Clasteron 27 Callisto Morphamax 5000 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -169,9 +157,6 @@ n name 3 Xiph Xlater 58 21 Xiph Xlater 300 14 Xiph Xlater 500 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Sorting fields within records: the sort-within-records verb @@ -215,9 +200,6 @@ b a c c b a 7 8 9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -228,9 +210,6 @@ a b c 1 2 3 5 4 6 9 8 7 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## The sort function by example @@ -249,9 +228,6 @@ go tool pprof -http=:8080 foo-stream
[1, 2, 3, 4, 5] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -264,9 +240,6 @@ go tool pprof -http=:8080 foo-stream
[5, 4, 3, 2, 1] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -279,9 +252,6 @@ go tool pprof -http=:8080 foo-stream
[1, 2, 3, 4, 5] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -294,9 +264,6 @@ go tool pprof -http=:8080 foo-stream
[5, 4, 3, 2, 1] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -313,9 +280,6 @@ go tool pprof -http=:8080 foo-stream "b": 1, "c": 2 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -332,9 +296,6 @@ go tool pprof -http=:8080 foo-stream "b": 1, "a": 3 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -358,9 +319,6 @@ go tool pprof -http=:8080 foo-stream "c": 2, "a": 3 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -384,9 +342,6 @@ go tool pprof -http=:8080 foo-stream "c": 2, "b": 1 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -399,9 +354,6 @@ go tool pprof -http=:8080 foo-stream
["a1", "a2", "a10", "a20", "a100", "a200"] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamIn the rest of this page we'll look more closely at these variants. @@ -445,9 +397,6 @@ key values alpha 1;4;5;6 beta 7;8;9;9 gamma 1;2;11;12 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Use the `"r"` flag for reverse, which is numerical descending: @@ -464,9 +413,6 @@ key values alpha 6;5;4;1 beta 9;9;8;7 gamma 12;11;2;1 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Use the `"f"` flag for lexical ascending sort (and `"fr"` would lexical descending): @@ -483,9 +429,6 @@ key values alpha 1;4;5;6 beta 7;8;9;9 gamma 1;11;12;2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Without and with case-folding: @@ -514,9 +457,6 @@ alpha,cat;bat;Australia;Bavaria;apple;Colombia key values alpha Australia;Bavaria;Colombia;apple;bat;cat alpha apple;Australia;bat;Bavaria;cat;Colombia -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Simple sorting of maps within records @@ -588,9 +528,6 @@ Also note that, unlike the `sort-within-record` verb with its `-r` flag, } } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Simple sorting of maps across records @@ -633,9 +570,6 @@ red square false 6 64 77.1991 9.5310 6 purple triangle false 7 65 80.1405 5.8240 7 yellow circle true 8 73 63.9785 4.2370 8 yellow circle true 9 87 63.5058 8.3350 9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Custom sorting of arrays within records @@ -704,9 +638,6 @@ recapitulate (for reference) what `sort` with default flags already does; the th "even_then_odd": [2, 4, 6, 8, 10, 1, 3, 5, 7, 9] } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Custom sorting of arrays across records @@ -760,9 +691,6 @@ red square true 2 15 79.2778 0.0130 purple triangle false 7 65 80.1405 5.8240 purple triangle false 5 51 81.2290 8.5910 yellow triangle true 1 11 43.6498 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Custom sorting of maps within records @@ -826,9 +754,6 @@ For example, we can sort ascending or descending by map key or map value: "b": 2, "c": 1 } -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Custom sorting of maps across records @@ -871,7 +796,4 @@ red square false 4 48 77.5542 7.4670 red circle true 3 16 13.8103 2.9010 red square true 2 15 79.2778 0.0130 yellow triangle true 1 11 43.6498 9.8870 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream diff --git a/docs/src/special-symbols-and-formatting.md b/docs/src/special-symbols-and-formatting.md index 97cd771ca6..c12fc01bfe 100644 --- a/docs/src/special-symbols-and-formatting.md +++ b/docs/src/special-symbols-and-formatting.md @@ -45,9 +45,6 @@ Likewise [JSON](file-formats.md#json): "Role": "tester" } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream For Miller's [XTAB](file-formats.md#xtab-vertical-tabular) there is no escaping for carriage returns, but commas work fine: @@ -61,9 +58,6 @@ Role administrator Name Khavari, Darius Role tester -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream But for [key-value-pairs](file-formats.md#dkvp-key-value-pairs) and [index-numbered](file-formats.md#nidx-index-numbered-toolkit-style) formats, commas are the default field separator. And -- as of Miller 5.4.0 anyway -- there is no CSV-style double-quote-handling like there is for CSV. So commas within the data look like delimiters: @@ -74,9 +68,6 @@ But for [key-value-pairs](file-formats.md#dkvp-key-value-pairs) and [index-numbe
Name=Xiao, Lin,Role=administrator Name=Khavari, Darius,Role=tester -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamOne solution is to use a different delimiter, such as a pipe character: @@ -87,9 +78,6 @@ One solution is to use a different delimiter, such as a pipe character:
Name=Xiao, Lin|Role=administrator Name=Khavari, Darius|Role=tester -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamTo be extra-sure to avoid data/delimiter clashes, you can also use control @@ -101,9 +89,6 @@ characters as delimiters -- here, control-A:
Name=Xiao, Lin^ARole=administrator Name=Khavari, Darius^ARole=tester -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## How can I handle field names with special symbols in them? @@ -115,9 +100,6 @@ Simply surround the field names with curly braces:
x.a=3,y:b=4,z/c=5,product.all=60 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## How can I put single quotes into strings? @@ -133,9 +115,6 @@ $a = "It's OK, I said, then 'for now'."
a=It's OK, I said, then 'for now'. -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamSo: Miller's DSL uses double quotes for strings, and you can put single quotes (or backslash-escaped double-quotes) inside strings, no problem. @@ -147,9 +126,6 @@ Without putting the update expression in a file, it's messier:
a=It's OK, I said, 'for now'. -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThe idea is that the outermost single-quotes are to protect the `put` expression from the shell, and the double quotes within them are for Miller. To get a single quote in the middle there, you need to actually put it *outside* the single-quoting for the shell. The pieces are the following, all concatenated together: @@ -179,9 +155,6 @@ a=is it?,b=it is! a is it? b it is! c is it ... -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
mlr --oxtab put '$c = ssub($a, "?"," ...")' data/question.dat @@ -190,9 +163,6 @@ go tool pprof -http=:8080 foo-stream a is it? b it is! c is it ... -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamThe @@ -216,9 +186,6 @@ The `ssub` and `gssub` functions are also handy for dealing with non-UTF-8 strin
Kaðlín og Þormundr -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamMore generally, though, we have the DSL functions @@ -252,7 +219,4 @@ See also the [page on regular expressions](reference-main-regular-expressions.md
a=14°45',degrees=14.75 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/statistics-examples.md b/docs/src/statistics-examples.md index 77252f9e9a..b1b7ea7b3d 100644 --- a/docs/src/statistics-examples.md +++ b/docs/src/statistics-examples.md @@ -29,9 +29,6 @@ For one or more specified field names, simply compute p25 and p75, then write th x_p25 0.24667037823231752 x_p75 0.7481860062358446 x_iqr 0.5015156280035271 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream For wildcarded field names, first compute p25 and p75, then loop over field names with `p25` in them: @@ -55,9 +52,6 @@ y_p75 0.7640028449996572 i_iqr 5000 x_iqr 0.5015156280035271 y_iqr 0.5118661397595003 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Computing weighted means @@ -96,7 +90,4 @@ a=eks,wmean=4890.3815931472145,mean=4956.2900763358775 a=wye,wmean=4946.987746229947,mean=4920.001017293998 a=zee,wmean=5164.719684856538,mean=5123.092330239375 a=hat,wmean=4925.533162478552,mean=4967.743946419371 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 4c80d25c0f..146f3a81e1 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -50,9 +50,6 @@ you can simply do
x_sum 4986.019681679581 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamor @@ -67,9 +64,6 @@ wye 1023.5484702619565 zee 979.7420161495838 eks 1016.7728571314786 hat 1000.192668193983 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream rather than the more tedious @@ -84,9 +78,6 @@ rather than the more tedious
x_sum 4986.019681679581 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamor @@ -106,9 +97,6 @@ wye 1023.5484702619565 zee 979.7420161495838 eks 1016.7728571314786 hat 1000.192668193983 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream The former (`mlr stats1` et al.) has the advantages of being easier to type, being less error-prone to type, and running faster. @@ -155,9 +143,6 @@ NR x x_pct 3 0.204603 0 4 0.381399 31.90825807289974 5 0.573288 66.54051068806446 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Line-number ratios @@ -185,9 +170,6 @@ I N PCT a b i x y 3 5 60 wye wye 3 0.204603 0.338318 4 5 80 eks wye 4 0.381399 0.134188 5 5 100 wye pan 5 0.573288 0.863624 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Records having max value @@ -230,9 +212,6 @@ blue purple 2 0.208785 purple purple 1 0.455077 red purple 4 0.477187 blue red 4 0.007487 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream Of course, the largest value of `n` isn't known until after all data have been read. Using an [out-of-stream variable](reference-dsl-variables.md#out-of-stream-variables) we can [retain all records as they are read](operating-on-all-records.md), then filter them at the end: @@ -272,9 +251,6 @@ purple red 5 0.454779 orange blue 5 0.705700 purple red 5 0.072936 green purple 5 0.203577 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream ## Feature-counting @@ -373,9 +349,6 @@ Then "key_fraction": 0.08333333333333333 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -400,9 +373,6 @@ latency 0.5833333333333334 name 0.3333333333333333 uid 0.25 uid2 0.08333333333333333 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Unsparsing @@ -495,9 +465,6 @@ end { "w": 2 } ] -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -509,9 +476,6 @@ a,b,v,u,x,w ,2,,1,, 1,,2,,3, ,,1,,,2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -523,9 +487,6 @@ a b v u x w - 2 - 1 - - 1 - 2 - 3 - - - 1 - - 2 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Mean without/with oosvars @@ -536,9 +497,6 @@ go tool pprof -http=:8080 foo-stream
x_mean 0.49860196816795804 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -554,9 +512,6 @@ go tool pprof -http=:8080 foo-streamx_mean 0.49860196816795804 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Keyed mean without/with oosvars @@ -591,9 +546,6 @@ zee hat 0.46772617655014515 wye zee 0.5059066170573692 eks hat 0.5006790659966355 wye eks 0.5306035254809106 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -635,9 +587,6 @@ hat zee 0.5099985721987774 hat eks 0.48587864619953547 hat hat 0.47993053101017374 hat pan 0.4643355557376876 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Variance and standard deviation without/with oosvars @@ -651,9 +600,6 @@ x_sum 4986.019681679581 x_mean 0.49860196816795804 x_var 0.08426974433144456 x_stddev 0.2902925151144007 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -681,9 +627,6 @@ sumx2 3328.652400179729 mean 0.49860196816795804 var 0.08426974433144456 stddev 0.2902925151144007 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamYou can also do this keyed, of course, imitating the keyed-mean example above. @@ -696,9 +639,6 @@ You can also do this keyed, of course, imitating the keyed-mean example above.
x_min 0.00004509679127584487 x_max 0.999952670371898 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -711,9 +651,6 @@ go tool pprof -http=:8080 foo-streamx_min 0.00004509679127584487 x_max 0.999952670371898 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Keyed min/max without/with oosvars @@ -728,9 +665,6 @@ eks 0.0006917972627396018 0.9988110946859143 wye 0.0001874794831505655 0.9998228522652893 zee 0.0005486114815762555 0.9994904324789629 hat 0.00004509679127584487 0.999952670371898 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -749,9 +683,6 @@ eks 0.0006917972627396018 0.9988110946859143 wye 0.0001874794831505655 0.9998228522652893 zee 0.0005486114815762555 0.9994904324789629 hat 0.00004509679127584487 0.999952670371898 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Delta without/with oosvars @@ -766,9 +697,6 @@ eks pan 2 0.758679 0.522151 0.411888 wye wye 3 0.204603 0.338318 -0.554076 eks wye 4 0.381399 0.134188 0.17679599999999998 wye pan 5 0.573288 0.863624 0.19188900000000003 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -784,9 +712,6 @@ eks pan 2 0.758679 0.522151 0.411888 wye wye 3 0.204603 0.338318 -0.554076 eks wye 4 0.381399 0.134188 0.17679599999999998 wye pan 5 0.573288 0.863624 0.19188900000000003 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Keyed delta without/with oosvars @@ -801,9 +726,6 @@ eks pan 2 0.758679 0.522151 0 wye wye 3 0.204603 0.338318 0 eks wye 4 0.381399 0.134188 -0.37728 wye pan 5 0.573288 0.863624 0.36868500000000004 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -819,9 +741,6 @@ eks pan 2 0.758679 0.522151 0 wye wye 3 0.204603 0.338318 0 eks wye 4 0.381399 0.134188 -0.37728 wye pan 5 0.573288 0.863624 0.36868500000000004 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream## Exponentially weighted moving averages without/with oosvars @@ -836,9 +755,6 @@ eks pan 2 0.758679 0.522151 0.3879798 wye wye 3 0.204603 0.338318 0.36964211999999996 eks wye 4 0.381399 0.134188 0.37081780799999997 wye pan 5 0.573288 0.863624 0.3910648272 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -855,7 +771,4 @@ eks pan 2 0.758679 0.522151 0.3879798 wye wye 3 0.204603 0.338318 0.36964211999999996 eks wye 4 0.381399 0.134188 0.37081780799999997 wye pan 5 0.573288 0.863624 0.3910648272 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamdiff --git a/docs/src/unix-toolkit-context.md b/docs/src/unix-toolkit-context.md index 59591d6d8a..1687f4868a 100644 --- a/docs/src/unix-toolkit-context.md +++ b/docs/src/unix-toolkit-context.md @@ -47,9 +47,6 @@ a,b,c 1,2,3 4,5,6 7,8,9 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-stream
@@ -60,9 +57,6 @@ a,b,c 7,8,9 4,5,6 1,2,3 -Memory profile started. -Memory profile finished. -go tool pprof -http=:8080 foo-streamLikewise with `mlr sort`, `mlr tac`, and so on. diff --git a/internal/pkg/version/version.go b/internal/pkg/version/version.go index f3e21ac9fb..6d6f355b16 100644 --- a/internal/pkg/version/version.go +++ b/internal/pkg/version/version.go @@ -4,4 +4,4 @@ package version // Nominally things like "6.0.0" for a release, then "6.0.0-dev" in between. // This makes it clear that a given build is on the main dev branch, not a // particular snapshot tag. -var STRING string = "6.4.0-dev" +var STRING string = "6.5.0" diff --git a/man/manpage.txt b/man/manpage.txt index df9bf4e322..1526e60544 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -2,11 +2,11 @@ MILLER(1) MILLER(1) -NAME +1mNAME0m Miller -- like awk, sed, cut, join, and sort for name-indexed data such as CSV and tabular JSON. -SYNOPSIS +1mSYNOPSIS0m Usage: mlr [flags] {verb} [verb-dependent options ...] {zero or more file names} @@ -22,16 +22,16 @@ SYNOPSIS https://miller.readthedocs.io -DESCRIPTION +1mDESCRIPTION0m Miller operates on key-value-pair data while the familiar Unix tools operate on integer-indexed fields: if the natural data structure for the latter is the array, then Miller's natural data structure is the insertion-ordered hash map. This encompasses a variety of data formats, including but not limited to the familiar CSV, TSV, and JSON. (Miller can handle positionally-indexed data as a special case.) This - manpage documents mlr 6.4.0. + manpage documents mlr 6.5.0. -EXAMPLES +1mEXAMPLES0m mlr --icsv --opprint cat example.csv mlr --icsv --opprint sort -f shape example.csv mlr --icsv --opprint sort -f shape -nr index example.csv @@ -40,7 +40,7 @@ EXAMPLES mlr --icsv --ojson put '$ratio = $quantity / $rate' example.csv mlr --icsv --opprint --from example.csv sort -nr index then cut -f shape,quantity -FILE FORMATS +1mFILE FORMATS0m CSV/CSV-lite: comma-separated values with separate header line TSV: same but with tabs in places of commas +---------------------+ @@ -112,7 +112,7 @@ FILE FORMATS | fox jumped | Record 2: "1":"fox", "2":"jumped" +---------------------+ -HELP OPTIONS +1mHELP OPTIONS0m Type 'mlr help {topic}' for any of the following: Essentials: mlr help topics @@ -168,7 +168,7 @@ HELP OPTIONS Use 'mlr help find ...' for approximate (substring) matches, e.g. 'mlr help find map' for all things with "map" in their names. -VERB LIST +1mVERB LIST0m altkv bar bootstrap cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values fraction gap grep group-by group-like having-fields head histogram json-parse @@ -178,7 +178,7 @@ VERB LIST sort sort-within-records split stats1 stats2 step summary tac tail tee template top utf8-to-latin1 unflatten uniq unsparsify -FUNCTION LIST +1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent asserting_array asserting_bool asserting_boolean asserting_empty asserting_empty_map asserting_error asserting_float asserting_int @@ -204,7 +204,7 @@ FUNCTION LIST version ! != !=~ % & && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ -COMMENTS-IN-DATA FLAGS +1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as # This is a comment for a CSV file @@ -233,7 +233,7 @@ COMMENTS-IN-DATA FLAGS Ignore commented lines within input, with specified prefix. -COMPRESSED-DATA FLAGS +1mCOMPRESSED-DATA FLAGS0m Miller offers a few different ways to handle reading data files which have been compressed. @@ -286,7 +286,7 @@ COMPRESSED-DATA FLAGS --zin Uncompress zlib within the Miller process. Done by default if file ends in `.z`. -CSV/TSV-ONLY FLAGS +1mCSV/TSV-ONLY FLAGS0m These are flags which are applicable to CSV format. --allow-ragged-csv-input or --ragged or --allow-ragged-tsv-input @@ -317,7 +317,7 @@ CSV/TSV-ONLY FLAGS -N Keystroke-saver for `--implicit-csv-header --headerless-csv-output`. -FILE-FORMAT FLAGS +1mFILE-FORMAT FLAGS0m See the File formats doc page, and or `mlr help file-formats`, for more about file formats Miller supports. @@ -381,7 +381,7 @@ FILE-FORMAT FLAGS -o {format name} Use format name for output data. For example: `-o csv` is the same as `--ocsv`. -FLATTEN-UNFLATTEN FLAGS +1mFLATTEN-UNFLATTEN FLAGS0m These flags control how Miller converts record values which are maps or arrays, when input is JSON and output is non-JSON (flattening) or input is non-JSON and output is JSON (unflattening). See the Flatten/unflatten doc page for more information. @@ -402,7 +402,7 @@ FLATTEN-UNFLATTEN FLAGS `--no-auto-flatten`, instead we get `${y.1}=7,${y.2}=8,${y.3}=9`. -FORMAT-CONVERSION KEYSTROKE-SAVER FLAGS +1mFORMAT-CONVERSION KEYSTROKE-SAVER FLAGS0m As keystroke-savers for format-conversion you may use the following. The letters c, t, j, l, d, n, x, p, and m refer to formats CSV, TSV, DKVP, NIDX, JSON, JSON Lines, XTAB, PPRINT, and markdown, respectively. Note that markdown @@ -422,7 +422,7 @@ FORMAT-CONVERSION KEYSTROKE-SAVER FLAGS -p Keystroke-saver for `--nidx --fs space --repifs`. -T Keystroke-saver for `--nidx --fs tab`. -JSON-ONLY FLAGS +1mJSON-ONLY FLAGS0m These are flags which are applicable to JSON output format. --jlistwrap or --jl Wrap JSON output in outermost `[ ]`. This is the @@ -437,7 +437,7 @@ JSON-ONLY FLAGS --no-jvstack Put objects/arrays all on one line for JSON output. This is the default for JSON Lines output format. -LEGACY FLAGS +1mLEGACY FLAGS0m These are flags which don't do anything in the current Miller version. They are accepted as no-op flags in order to keep old scripts from breaking. @@ -468,7 +468,7 @@ LEGACY FLAGS --vflatsep Ignored as of version 6. This functionality is subsumed into JSON formatting. -MISCELLANEOUS FLAGS +1mMISCELLANEOUS FLAGS0m These are flags which don't fit into any other category. --fflush Force buffered output to be written after every output record. The default is flush output after @@ -559,7 +559,7 @@ MISCELLANEOUS FLAGS information please see https://miller.readthedocs.io/en/latest/scripting/. -OUTPUT-COLORIZATION FLAGS +1mOUTPUT-COLORIZATION FLAGS0m Miller uses colors to highlight outputs. You can specify color preferences. Note: output colorization does not work on Windows. @@ -640,14 +640,14 @@ OUTPUT-COLORIZATION FLAGS --value-color Specify the color (see `--list-color-codes` and `--list-color-names`) for record values. -PPRINT-ONLY FLAGS +1mPPRINT-ONLY FLAGS0m These are flags which are applicable to PPRINT format. --barred Prints a border around PPRINT output (not available for input). --right Right-justifies all fields for PPRINT output. -PROFILING FLAGS +1mPROFILING FLAGS0m These are flags for profiling Miller performance. --cpuprofile {CPU-profile file name} Create a CPU-profile file for performance analysis. @@ -661,7 +661,7 @@ PROFILING FLAGS must be the very first thing after 'mlr' on the command line. -SEPARATOR FLAGS +1mSEPARATOR FLAGS0m See the Separators doc page for more about record separators, field separators, and pair separators. Also see the File formats doc page, or `mlr help file-formats`, for more about the file formats Miller supports. @@ -770,7 +770,7 @@ SEPARATOR FLAGS spaces. --rs {string} Specify RS for input and output. -AUXILIARY COMMANDS +1mAUXILIARY COMMANDS0m Available subcommands: aux-list hex @@ -783,7 +783,7 @@ AUXILIARY COMMANDS version For more information, please invoke mlr {subcommand} --help. -MLRRC +1mMLRRC0m You can set up personal defaults via a $HOME/.mlrrc and/or ./.mlrrc. For example, if you usually process CSV, then you can put "--csv" in your .mlrrc file and that will be the default input/output format unless otherwise specified on the command line. @@ -817,7 +817,7 @@ MLRRC See also: https://miller.readthedocs.io/en/latest/customization.html -REPL +1mREPL0m Usage: mlr repl [options] {zero or more data-file names} -v Prints the expressions's AST (abstract syntax tree), which gives full transparency on the precedence and associativity rules of @@ -847,14 +847,14 @@ REPL Any data-file names are opened just as if you had waited and typed :open {filenames} at the Miller REPL prompt. -VERBS - altkv +1mVERBS0m + 1maltkv0m Usage: mlr altkv [options] Given fields with values of the form a,b,c,d,e,f emits a=b,c=d,e=f pairs. Options: -h|--help Show this message. - bar + 1mbar0m Usage: mlr bar [options] Replaces a numeric field with a number of asterisks, allowing for cheesy bar plots. These align best with --opprint or --oxtab output format. @@ -872,7 +872,7 @@ VERBS However you can make them all longer if you so desire. -h|--help Show this message. - bootstrap + 1mbootstrap0m Usage: mlr bootstrap [options] Emits an n-sample, with replacement, of the input records. See also mlr sample and mlr shuffle. @@ -881,7 +881,7 @@ VERBS Must be non-negative. -h|--help Show this message. - cat + 1mcat0m Usage: mlr cat [options] Passes input records directly to output. Most useful for format conversion. Options: @@ -892,14 +892,14 @@ VERBS --filenum Prepend current filenum (1-up) to each record. -h|--help Show this message. - check + 1mcheck0m Usage: mlr check [options] Consumes records without printing any output. Useful for doing a well-formatted check on input data. Options: -h|--help Show this message. - clean-whitespace + 1mclean-whitespace0m Usage: mlr clean-whitespace [options] For each record, for each field in the record, whitespace-cleans the keys and/or values. Whitespace-cleaning entails stripping leading and trailing whitespace, @@ -914,7 +914,7 @@ VERBS leave off -k as well as -v. -h|--help Show this message. - count-distinct + 1mcount-distinct0m Usage: mlr count-distinct [options] Prints number of records having distinct values for specified field names. Same as uniq -c. @@ -930,7 +930,7 @@ VERBS for distinct a field values and counts for distinct b field values separately. - count + 1mcount0m Usage: mlr count [options] Prints number of records, optionally grouped by distinct values for specified field names. Options: @@ -939,7 +939,7 @@ VERBS -o {name} Field name for output-count. Default "count". -h|--help Show this message. - count-similar + 1mcount-similar0m Usage: mlr count-similar [options] Ingests all records, then emits each record augmented by a count of the number of other records having the same group-by field values. @@ -948,7 +948,7 @@ VERBS -o {name} Field name for output-counts. Defaults to "count". -h|--help Show this message. - cut + 1mcut0m Usage: mlr cut [options] Passes through input records with specified fields included/excluded. Options: @@ -968,7 +968,7 @@ VERBS mlr cut -r -f '^status$,"sda[0-9]"' mlr cut -r -f '^status$,"sda[0-9]"i' (this is case-insensitive) - decimate + 1mdecimate0m Usage: mlr decimate [options] Passes through one of every n records, optionally by category. Options: @@ -978,7 +978,7 @@ VERBS -n {n} Decimation factor (default 10). -h|--help Show this message. - fill-down + 1mfill-down0m Usage: mlr fill-down [options] If a given record has a missing value for a given field, fill that from the corresponding value from a previous record, if any. @@ -994,14 +994,14 @@ VERBS -f Field names for fill-down. -h|--help Show this message. - fill-empty + 1mfill-empty0m Usage: mlr fill-empty [options] Fills empty-string fields with specified fill-value. Options: -v {string} Fill-value: defaults to "N/A" -S Don't infer type -- so '-v 0' would fill string 0 not int 0. - filter + 1mfilter0m Usage: mlr filter [options] {DSL expression} Options: -f {file name} File containing a DSL expression (see examples below). If the filename @@ -1086,7 +1086,7 @@ VERBS See also https://miller.readthedocs.io/reference-dsl for more context. - flatten + 1mflatten0m Usage: mlr flatten [options] Flattens multi-level maps to single-level ones. Example: field with name 'a' and value '{"b": { "c": 4 }}' becomes name 'a.b.c' and value 4. @@ -1095,7 +1095,7 @@ VERBS -s Separator, defaulting to mlr --flatsep value. -h|--help Show this message. - format-values + 1mformat-values0m Usage: mlr format-values [options] Applies format strings to all field values, depending on autodetected type. * If a field value is detected to be integer, applies integer format. @@ -1126,7 +1126,7 @@ VERBS -n Coerce field values autodetected as int to float, and then apply the float format. - fraction + 1mfraction0m Usage: mlr fraction [options] For each record's value in specified fields, computes the ratio of that value to the sum of values in that field over all input records. @@ -1148,7 +1148,7 @@ VERBS x=1,x_cumulative_fraction=0.1 x=2,x_cumulative_fraction=0.3 x=3,x_cumulative_fraction=0.6 and x=4,x_cumulative_fraction=1.0 - gap + 1mgap0m Usage: mlr gap [options] Emits an empty record every n records, or when certain values change. Options: @@ -1159,7 +1159,7 @@ VERBS -n is ignored if -g is present. -h|--help Show this message. - grep + 1mgrep0m Usage: mlr grep [options] {regular expression} Passes through records which match the regular expression. Options: @@ -1178,18 +1178,18 @@ VERBS features of system grep, you can do "mlr --odkvp ... | grep ... | mlr --idkvp ..." - group-by + 1mgroup-by0m Usage: mlr group-by [options] {comma-separated field names} Outputs records in batches having identical values at specified field names.Options: -h|--help Show this message. - group-like + 1mgroup-like0m Usage: mlr group-like [options] Outputs records in batches having identical field names. Options: -h|--help Show this message. - having-fields + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. Options: @@ -1205,7 +1205,7 @@ VERBS mlr having-fields --any-matching '"sda[0-9]"' mlr having-fields --any-matching '"sda[0-9]"i' (this is case-insensitive) - head + 1mhead0m Usage: mlr head [options] Passes through the first n records, optionally by category. Without -g, ceases consuming more input (i.e. is fast) when n records have been read. @@ -1214,7 +1214,7 @@ VERBS -n {n} Head-count to print. Default 10. -h|--help Show this message. - histogram + 1mhistogram0m Just a histogram. Input values < lo or > hi are not counted. Usage: mlr histogram [options] -f {a,b,c} Value-field names for histogram counts @@ -1226,14 +1226,14 @@ VERBS -o {prefix} Prefix for output field name. Default: no prefix. -h|--help Show this message. - json-parse + 1mjson-parse0m Usage: mlr json-parse [options] Tries to convert string field values to parsed JSON, e.g. "[1,2,3]" -> [1,2,3]. Options: -f {...} Comma-separated list of field names to json-parse (default all). -h|--help Show this message. - json-stringify + 1mjson-stringify0m Usage: mlr json-stringify [options] Produces string field values from field-value data, e.g. [1,2,3] -> "[1,2,3]". Options: @@ -1242,7 +1242,7 @@ VERBS --no-jvstack Produce single-line JSON output per record (default). -h|--help Show this message. - join + 1mjoin0m Usage: mlr join [options] Joins records from specified left file name with records from all file names at the end of the Miller argument list. @@ -1295,7 +1295,7 @@ VERBS Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#join for more information including examples. - label + 1mlabel0m Usage: mlr label [options] {new1,new2,new3,...} Given n comma-separated names, renames the first n fields of each record to have the respective name. (Fields past the nth are left with their original @@ -1305,14 +1305,14 @@ VERBS Options: -h|--help Show this message. - latin1-to-utf8 + 1mlatin1-to-utf80m Usage: mlr latin1-to-utf8, with no options. Recursively converts record strings from Latin-1 to UTF-8. For field-level control, please see the latin1_to_utf8 DSL function. Options: -h|--help Show this message. - least-frequent + 1mleast-frequent0m Usage: mlr least-frequent [options] Shows the least frequently occurring distinct values for specified field names. The first entry is the statistical anti-mode; the remaining are runners-up. @@ -1323,7 +1323,7 @@ VERBS -o {name} Field name for output count. Default "count". See also "mlr most-frequent". - merge-fields + 1mmerge-fields0m Usage: mlr merge-fields [options] Computes univariate statistics for each input record, accumulated across specified fields. @@ -1372,7 +1372,7 @@ VERBS since "a_in_x" and "a_out_x" both collapse to "a_x", "b_in_y" collapses to "b_y", and "b_out_x" collapses to "b_x". - most-frequent + 1mmost-frequent0m Usage: mlr most-frequent [options] Shows the most frequently occurring distinct values for specified field names. The first entry is the statistical mode; the remaining are runners-up. @@ -1383,7 +1383,7 @@ VERBS -o {name} Field name for output count. Default "count". See also "mlr least-frequent". - nest + 1mnest0m Usage: mlr nest [options] Explodes specified field values into separate fields/records, or reverses this. Options: @@ -1432,14 +1432,14 @@ VERBS e.g. by default the former is semicolon and the latter is comma. See also mlr reshape. - nothing + 1mnothing0m Usage: mlr nothing [options] Drops all input records. Useful for testing, or after tee/print/etc. have produced other output. Options: -h|--help Show this message. - put + 1mput0m Usage: mlr put [options] {DSL expression} Options: -f {file name} File containing a DSL expression (see examples below). If the filename @@ -1519,19 +1519,19 @@ VERBS See also https://miller.readthedocs.io/reference-dsl for more context. - regularize + 1mregularize0m Usage: mlr regularize [options] Outputs records sorted lexically ascending by keys. Options: -h|--help Show this message. - remove-empty-columns + 1mremove-empty-columns0m Usage: mlr remove-empty-columns [options] Omits fields which are empty on every input row. Non-streaming. Options: -h|--help Show this message. - rename + 1mrename0m Usage: mlr rename [options] {old1,new1,old2,new2,...} Renames specified fields. Options: @@ -1554,7 +1554,7 @@ VERBS mlr rename -r 'Date_([0-9]+).*,\1' Rename all such fields to be of the form 20151015 mlr rename -r '"name"i,Name' Rename "name", "Name", "NAME", etc. to "Name" - reorder + 1mreorder0m Usage: mlr reorder [options] Moves specified names to start of record, or end of record. Options: @@ -1572,7 +1572,7 @@ VERBS mlr reorder -f a,b sends input record "d=4,b=2,a=1,c=3" to "a=1,b=2,d=4,c=3". mlr reorder -e -f a,b sends input record "d=4,b=2,a=1,c=3" to "d=4,c=3,a=1,b=2". - repeat + 1mrepeat0m Usage: mlr repeat [options] Copies input records to output records multiple times. Options must be exactly one of the following: @@ -1599,7 +1599,7 @@ VERBS a=1,b=2,c=3 a=1,b=2,c=3 - reshape + 1mreshape0m Usage: mlr reshape [options] Wide-to-long options: -i {input field names} -o {key-field name,value-field name} @@ -1658,7 +1658,7 @@ VERBS 2009-01-03 0.98012375 1.3179287 See also mlr nest. - sample + 1msample0m Usage: mlr sample [options] Reservoir sampling (subsampling without replacement), optionally by category. See also mlr bootstrap and mlr shuffle. @@ -1667,7 +1667,7 @@ VERBS -k {k} Required: number of records to output in total, or by group if using -g. -h|--help Show this message. - sec2gmtdate + 1msec2gmtdate0m Usage: ../c/mlr sec2gmtdate {comma-separated list of field names} Replaces a numeric field representing seconds since the epoch with the corresponding GMT year-month-day timestamp; leaves non-numbers as-is. @@ -1676,7 +1676,7 @@ VERBS is the same as ../c/mlr put '$time1=sec2gmtdate($time1);$time2=sec2gmtdate($time2)' - sec2gmt + 1msec2gmt0m Usage: mlr sec2gmt [options] {comma-separated list of field names} Replaces a numeric field representing seconds since the epoch with the corresponding GMT timestamp; leaves non-numbers as-is. This is nothing @@ -1691,7 +1691,7 @@ VERBS --nanos Input numbers are treated as nanoseconds since the epoch. -h|--help Show this message. - seqgen + 1mseqgen0m Usage: mlr seqgen [options] Passes input records directly to output. Most useful for format conversion. Produces a sequence of counters. Discards the input record stream. Produces @@ -1707,21 +1707,21 @@ VERBS stop, and step are all integers. Step may be negative. It may not be zero unless start == stop. - shuffle + 1mshuffle0m Usage: mlr shuffle [options] Outputs records randomly permuted. No output records are produced until all input records are read. See also mlr bootstrap and mlr sample. Options: -h|--help Show this message. - skip-trivial-records + 1mskip-trivial-records0m Usage: mlr skip-trivial-records [options] Passes through all records except those with zero fields, or those for which all fields have empty value. Options: -h|--help Show this message. - sort + 1msort0m Usage: mlr sort {flags} Sorts records primarily by the first specified field, secondarily by the second field, and so on. (Any records not having all specified sort keys will appear @@ -1746,14 +1746,14 @@ VERBS which is the same as: mlr sort -f a -f b -nr x -nr y -nr z - sort-within-records + 1msort-within-records0m Usage: mlr sort-within-records [options] Outputs records sorted lexically ascending by keys. Options: -r Recursively sort subobjects/submaps, e.g. for JSON input. -h|--help Show this message. - split + 1msplit0m Usage: mlr split [options] {filename} Options: -n {n}: Cap file sizes at N records. @@ -1793,7 +1793,7 @@ VERBS See also the "tee" DSL function which lets you do more ad-hoc customization. - stats1 + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across the input record stream. @@ -1856,7 +1856,7 @@ VERBS In particular, 1 and 1.0 are distinct text for count and mode. * When there are mode ties, the first-encountered datum wins. - stats2 + 1mstats20m Usage: mlr stats2 [options] Computes bivariate statistics for one or more given field-name pairs, accumulated across the input record stream. @@ -1884,7 +1884,7 @@ VERBS Example: mlr stats2 -a linreg-ols,r2 -f x,y -g size,shape Example: mlr stats2 -a corr -f x,y - step + 1mstep0m Usage: mlr step [options] Computes values dependent on earlier/later records, optionally grouped by category. Options: @@ -1927,7 +1927,7 @@ VERBS https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. - summary + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. @@ -1956,7 +1956,7 @@ VERBS uof upper outer fence: p75 + 3.0 * iqr Default summarizers: - field_type count mean min median max null_count distinct_count + field_type count mean min max null_count distinct_count Notes: * min, p25, median, p75, and max work for strings as well as numbers @@ -1969,13 +1969,13 @@ VERBS --all Use all available summarizers. -h|--help Show this message. - tac + 1mtac0m Usage: mlr tac [options] Prints records in reverse order from the order in which they were encountered. Options: -h|--help Show this message. - tail + 1mtail0m Usage: mlr tail [options] Passes through the last n records, optionally by category. Options: @@ -1983,7 +1983,7 @@ VERBS -n {n} Head-count to print. Default 10. -h|--help Show this message. - tee + 1mtee0m Usage: mlr tee [options] {filename} Options: -a Append to existing file, if any, rather than overwriting. @@ -1995,7 +1995,7 @@ VERBS -h|--help Show this message. - template + 1mtemplate0m Usage: mlr template [options] Places input-record fields in the order specified by list of column names. If the input record is missing a specified field, it will be filled with the fill-with. @@ -2010,7 +2010,7 @@ VERBS * Input record is c=3,a=1,f=6. * Output record is a=1,b=,c=3. - top + 1mtop0m Usage: mlr top [options] -f {a,b,c} Value-field names for top counts. -g {d,e,f} Optional group-by-field names for top counts. @@ -2028,14 +2028,14 @@ VERBS from -f, fields from -g, and the top-index field are emitted. For more information please see https://miller.readthedocs.io/en/latest/reference-verbs#top - utf8-to-latin1 + 1mutf8-to-latin10m Usage: mlr utf8-to-latin1, with no options. Recursively converts record strings from Latin-1 to UTF-8. For field-level control, please see the utf8_to_latin1 DSL function. Options: -h|--help Show this message. - unflatten + 1munflatten0m Usage: mlr unflatten [options] Reverses flatten. Example: field with name 'a.b.c' and value 4 becomes name 'a' and value '{"b": { "c": 4 }}'. @@ -2044,7 +2044,7 @@ VERBS -s {string} Separator, defaulting to mlr --flatsep value. -h|--help Show this message. - uniq + 1muniq0m Usage: mlr uniq [options] Prints distinct values for specified field names. With -c, same as count-distinct. For uniq, -f is a synonym for -g. @@ -2059,7 +2059,7 @@ VERBS With -n, produces only one record which is the unique-record count. With neither -c nor -n, produces unique records. - unsparsify + 1munsparsify0m Usage: mlr unsparsify [options] Prints records with the union of field names over all input records. For field names absent in a given record but present in others, fills in @@ -2074,239 +2074,239 @@ VERBS being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and 'a=,b=3,c=4'. -FUNCTIONS FOR FILTER/PUT - abs +1mFUNCTIONS FOR FILTER/PUT0m + 1mabs0m (class=math #args=1) Absolute value. - acos + 1macos0m (class=math #args=1) Inverse trigonometric cosine. - acosh + 1macosh0m (class=math #args=1) Inverse hyperbolic cosine. - any + 1many0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for any array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: any([10,20,30], func(e) {return $index == e}) Map example: any({"a": "foo", "b": "bar"}, func(k,v) {return $[k] == v}) - append + 1mappend0m (class=collections #args=2) Appends second argument to end of first argument, which must be an array. - apply + 1mapply0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, applies the function to each element of the array/map. For arrays, the function should take one argument, for array element; it should return a new element. For maps, it should take two arguments, for map-element key and value; it should return a new key-value pair (i.e. a single-entry map). Examples: Array example: apply([1,2,3,4,5], func(e) {return e ** 3}) returns [1, 8, 27, 64, 125]. Map example: apply({"a":1, "b":3, "c":5}, func(k,v) {return {toupper(k): v ** 2}}) returns {"A": 1, "B":9, "C": 25}", - arrayify + 1marrayify0m (class=collections #args=1) Walks through a nested map/array, converting any map with consecutive keys "1", "2", ... into an array. Useful to wrap the output of unflatten. - asin + 1masin0m (class=math #args=1) Inverse trigonometric sine. - asinh + 1masinh0m (class=math #args=1) Inverse hyperbolic sine. - asserting_absent + 1masserting_absent0m (class=typing #args=1) Aborts with an error if is_absent on the argument returns false, else returns its argument. - asserting_array + 1masserting_array0m (class=typing #args=1) Aborts with an error if is_array on the argument returns false, else returns its argument. - asserting_bool + 1masserting_bool0m (class=typing #args=1) Aborts with an error if is_bool on the argument returns false, else returns its argument. - asserting_boolean + 1masserting_boolean0m (class=typing #args=1) Aborts with an error if is_boolean on the argument returns false, else returns its argument. - asserting_empty + 1masserting_empty0m (class=typing #args=1) Aborts with an error if is_empty on the argument returns false, else returns its argument. - asserting_empty_map + 1masserting_empty_map0m (class=typing #args=1) Aborts with an error if is_empty_map on the argument returns false, else returns its argument. - asserting_error + 1masserting_error0m (class=typing #args=1) Aborts with an error if is_error on the argument returns false, else returns its argument. - asserting_float + 1masserting_float0m (class=typing #args=1) Aborts with an error if is_float on the argument returns false, else returns its argument. - asserting_int + 1masserting_int0m (class=typing #args=1) Aborts with an error if is_int on the argument returns false, else returns its argument. - asserting_map + 1masserting_map0m (class=typing #args=1) Aborts with an error if is_map on the argument returns false, else returns its argument. - asserting_nonempty_map + 1masserting_nonempty_map0m (class=typing #args=1) Aborts with an error if is_nonempty_map on the argument returns false, else returns its argument. - asserting_not_array + 1masserting_not_array0m (class=typing #args=1) Aborts with an error if is_not_array on the argument returns false, else returns its argument. - asserting_not_empty + 1masserting_not_empty0m (class=typing #args=1) Aborts with an error if is_not_empty on the argument returns false, else returns its argument. - asserting_not_map + 1masserting_not_map0m (class=typing #args=1) Aborts with an error if is_not_map on the argument returns false, else returns its argument. - asserting_not_null + 1masserting_not_null0m (class=typing #args=1) Aborts with an error if is_not_null on the argument returns false, else returns its argument. - asserting_null + 1masserting_null0m (class=typing #args=1) Aborts with an error if is_null on the argument returns false, else returns its argument. - asserting_numeric + 1masserting_numeric0m (class=typing #args=1) Aborts with an error if is_numeric on the argument returns false, else returns its argument. - asserting_present + 1masserting_present0m (class=typing #args=1) Aborts with an error if is_present on the argument returns false, else returns its argument. - asserting_string + 1masserting_string0m (class=typing #args=1) Aborts with an error if is_string on the argument returns false, else returns its argument. - atan + 1matan0m (class=math #args=1) One-argument arctangent. - atan2 + 1matan20m (class=math #args=2) Two-argument arctangent. - atanh + 1matanh0m (class=math #args=1) Inverse hyperbolic tangent. - bitcount + 1mbitcount0m (class=arithmetic #args=1) Count of 1-bits. - boolean + 1mboolean0m (class=conversion #args=1) Convert int/float/bool/string to boolean. - capitalize + 1mcapitalize0m (class=string #args=1) Convert string's first character to uppercase. - cbrt + 1mcbrt0m (class=math #args=1) Cube root. - ceil + 1mceil0m (class=math #args=1) Ceiling: nearest integer at or above. - clean_whitespace + 1mclean_whitespace0m (class=string #args=1) Same as collapse_whitespace and strip. - collapse_whitespace + 1mcollapse_whitespace0m (class=string #args=1) Strip repeated whitespace from string. - concat + 1mconcat0m (class=collections #args=variadic) Returns the array concatenation of the arguments. Non-array arguments are treated as single-element arrays. Examples: concat(1,2,3) is [1,2,3] concat([1,2],3) is [1,2,3] concat([1,2],[3]) is [1,2,3] - cos + 1mcos0m (class=math #args=1) Trigonometric cosine. - cosh + 1mcosh0m (class=math #args=1) Hyperbolic cosine. - depth + 1mdepth0m (class=collections #args=1) Prints maximum depth of map/array. Scalars have depth 0. - dhms2fsec + 1mdhms2fsec0m (class=time #args=1) Recovers floating-point seconds as in dhms2fsec("5d18h53m20.250000s") = 500000.250000 - dhms2sec + 1mdhms2sec0m (class=time #args=1) Recovers integer seconds as in dhms2sec("5d18h53m20s") = 500000 - erf + 1merf0m (class=math #args=1) Error function. - erfc + 1merfc0m (class=math #args=1) Complementary error function. - every + 1mevery0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, yields a boolean true if the argument function returns true for every array/map element, false otherwise. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: every(["a", "b", "c"], func(e) {return $[e] >= 0}) Map example: every({"a": "foo", "b": "bar"}, func(k,v) {return $[k] == v}) - exec + 1mexec0m (class=system #args=variadic) '$output = exec( "command", ["arg1", "arg2"], {"env": ["ENV_VAR=ENV_VALUE", "ENV_VAR2=ENV_VALUE2"], "dir": "/tmp/run_command_here", "stdin_string": "this is input fed to program", "combined_output": true )' Run a command via executable, path, args and environment, yielding its stdout minus final carriage return. Example: exec("echo", ["I don't do", "$SHELL things"], {"env": "SHELL=sh"}) outputs "I don't do $SHELL things" - exp + 1mexp0m (class=math #args=1) Exponential function e**x. - expm1 + 1mexpm10m (class=math #args=1) e**x - 1. - flatten + 1mflatten0m (class=collections #args=2,3) Flattens multi-level maps to single-level ones. Useful for nested JSON-like structures for non-JSON file formats like CSV. With two arguments, the first argument is a map (maybe $*) and the second argument is the flatten separator. With three arguments, the first argument is prefix, the second is the flatten separator, and the third argument is a map; flatten($*, ".") is the same as flatten("", ".", $*). See "Flatten/unflatten: converting between JSON and tabular formats" at https://miller.readthedocs.io for more information. Examples: flatten({"a":[1,2],"b":3}, ".") is {"a.1": 1, "a.2": 2, "b": 3}. flatten("a", ".", {"b": { "c": 4 }}) is {"a.b.c" : 4}. flatten("", ".", {"a": { "b": 3 }}) is {"a.b" : 3}. - float + 1mfloat0m (class=conversion #args=1) Convert int/float/bool/string to float. - floor + 1mfloor0m (class=math #args=1) Floor: nearest integer at or below. - fmtifnum + 1mfmtifnum0m (class=conversion #args=2) Identical to fmtnum, except returns the first argument as-is if the output would be an error. Examples: fmtifnum(3.4, "%.6f") gives 3.400000" fmtifnum("abc", "%.6f") gives abc" $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone - fmtnum + 1mfmtnum0m (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. Example: $x = fmtnum($x, "%.6f") - fold + 1mfold0m (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument. Examples: Array example: fold([1,2,3,4,5], func(acc,e) {return acc + e**3}, 10000) returns 10225. Map example: fold({"a":1, "b":3, "c": 5}, func(acck,accv,ek,ev) {return {"sum": accv+ev**2}}, {"sum":10000}) returns 10035. - format + 1mformat0m (class=string #args=variadic) Using first argument as format string, interpolate remaining arguments in place of each "{}" in the format string. Too-few arguments are treated as the empty string; too-many arguments are discarded. Examples: format("{}:{}:{}", 1,2) gives "1:2:". format("{}:{}:{}", 1,2,3) gives "1:2:3". format("{}:{}:{}", 1,2,3,4) gives "1:2:3". - fsec2dhms + 1mfsec2dhms0m (class=time #args=1) Formats floating-point seconds as in fsec2dhms(500000.25) = "5d18h53m20.250000s" - fsec2hms + 1mfsec2hms0m (class=time #args=1) Formats floating-point seconds as in fsec2hms(5000.25) = "01:23:20.250000" - get_keys + 1mget_keys0m (class=collections #args=1) Returns array of keys of map or array - get_values + 1mget_values0m (class=collections #args=1) Returns array of values of map or array -- in the latter case, returns a copy of the array - gmt2localtime + 1mgmt2localtime0m (class=time #args=1,2) Convert from a GMT-time string to a local-time string. Consulting $TZ unless second argument is supplied. Examples: gmt2localtime("1999-12-31T22:00:00Z") = "2000-01-01 00:00:00" with TZ="Asia/Istanbul" gmt2localtime("1999-12-31T22:00:00Z", "Asia/Istanbul") = "2000-01-01 00:00:00" - gmt2sec + 1mgmt2sec0m (class=time #args=1) Parses GMT timestamp as integer seconds since the epoch. Example: gmt2sec("2001-02-03T04:05:06Z") = 981173106 - gssub + 1mgssub0m (class=string #args=3) Like gsub but does no regexing. No characters are special. Example: gssub("ab.d.fg", ".", "X") gives "abXdXfg" - gsub + 1mgsub0m (class=string #args=3) '$name = gsub($name, "old", "new")': replace all, with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to gsub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: gsub("ababab", "ab", "XY") gives "XYXYXY" @@ -2315,244 +2315,244 @@ FUNCTIONS FOR FILTER/PUT gsub("abcdefg", "[ce]", "X") gives "abXdXfg" gsub("prefix4529:suffix8567", "(....ix)([0-9]+)", "[\1 : \2]") gives "[prefix : 4529]:[suffix : 8567]" - haskey + 1mhaskey0m (class=collections #args=2) True/false if map has/hasn't key, e.g. 'haskey($*, "a")' or 'haskey(mymap, mykey)', or true/false if array index is in bounds / out of bounds. Error if 1st argument is not a map or array. Note -n..-1 alias to 1..n in Miller arrays. - hexfmt + 1mhexfmt0m (class=conversion #args=1) Convert int to hex string, e.g. 255 to "0xff". - hms2fsec + 1mhms2fsec0m (class=time #args=1) Recovers floating-point seconds as in hms2fsec("01:23:20.250000") = 5000.250000 - hms2sec + 1mhms2sec0m (class=time #args=1) Recovers integer seconds as in hms2sec("01:23:20") = 5000 - hostname + 1mhostname0m (class=system #args=0) Returns the hostname as a string. - int + 1mint0m (class=conversion #args=1) Convert int/float/bool/string to int. - invqnorm + 1minvqnorm0m (class=math #args=1) Inverse of normal cumulative distribution function. Note that invqorm(urand()) is normally distributed. - is_absent + 1mis_absent0m (class=typing #args=1) False if field is present in input, true otherwise - is_array + 1mis_array0m (class=typing #args=1) True if argument is an array. - is_bool + 1mis_bool0m (class=typing #args=1) True if field is present with boolean value. Synonymous with is_boolean. - is_boolean + 1mis_boolean0m (class=typing #args=1) True if field is present with boolean value. Synonymous with is_bool. - is_empty + 1mis_empty0m (class=typing #args=1) True if field is present in input with empty string value, false otherwise. - is_empty_map + 1mis_empty_map0m (class=typing #args=1) True if argument is a map which is empty. - is_error + 1mis_error0m (class=typing #args=1) True if if argument is an error, such as taking string length of an integer. - is_float + 1mis_float0m (class=typing #args=1) True if field is present with value inferred to be float - is_int + 1mis_int0m (class=typing #args=1) True if field is present with value inferred to be int - is_map + 1mis_map0m (class=typing #args=1) True if argument is a map. - is_nan + 1mis_nan0m (class=typing #args=1) True if the argument is the NaN (not-a-number) floating-point value. Note that NaN has the property that NaN != NaN, so you need 'is_nan(x)' rather than 'x == NaN'. - is_nonempty_map + 1mis_nonempty_map0m (class=typing #args=1) True if argument is a map which is non-empty. - is_not_array + 1mis_not_array0m (class=typing #args=1) True if argument is not an array. - is_not_empty + 1mis_not_empty0m (class=typing #args=1) True if field is present in input with non-empty value, false otherwise - is_not_map + 1mis_not_map0m (class=typing #args=1) True if argument is not a map. - is_not_null + 1mis_not_null0m (class=typing #args=1) False if argument is null (empty, absent, or JSON null), true otherwise. - is_null + 1mis_null0m (class=typing #args=1) True if argument is null (empty, absent, or JSON null), false otherwise. - is_numeric + 1mis_numeric0m (class=typing #args=1) True if field is present with value inferred to be int or float - is_present + 1mis_present0m (class=typing #args=1) True if field is present in input, false otherwise. - is_string + 1mis_string0m (class=typing #args=1) True if field is present with string (including empty-string) value - joink + 1mjoink0m (class=conversion #args=2) Makes string from map/array keys. First argument is map/array; second is separator string. Examples: joink({"a":3,"b":4,"c":5}, ",") = "a,b,c". joink([1,2,3], ",") = "1,2,3". - joinkv + 1mjoinkv0m (class=conversion #args=3) Makes string from map/array key-value pairs. First argument is map/array; second is pair-separator string; third is field-separator string. Mnemonic: the "=" comes before the "," in the output and in the arguments to joinkv. Examples: joinkv([3,4,5], "=", ",") = "1=3,2=4,3=5" joinkv({"a":3,"b":4,"c":5}, ":", ";") = "a:3;b:4;c:5" - joinv + 1mjoinv0m (class=conversion #args=2) Makes string from map/array values. First argument is map/array; second is separator string. Examples: joinv([3,4,5], ",") = "3,4,5" joinv({"a":3,"b":4,"c":5}, ",") = "3,4,5" - json_parse + 1mjson_parse0m (class=collections #args=1) Converts value from JSON-formatted string. - json_stringify + 1mjson_stringify0m (class=collections #args=1,2) Converts value to JSON-formatted string. Default output is single-line. With optional second boolean argument set to true, produces multiline output. - latin1_to_utf8 + 1mlatin1_to_utf80m (class=string #args=1) Tries to convert Latin-1-encoded string to UTF-8-encoded string. If argument is array or map, recurses into it. Examples: $y = latin1_to_utf8($x) $* = latin1_to_utf8($*) - leafcount + 1mleafcount0m (class=collections #args=1) Counts total number of terminal values in map/array. For single-level map/array, same as length. - length + 1mlength0m (class=collections #args=1) Counts number of top-level entries in array/map. Scalars have length 1. - localtime2gmt + 1mlocaltime2gmt0m (class=time #args=1,2) Convert from a local-time string to a GMT-time string. Consults $TZ unless second argument is supplied. Examples: localtime2gmt("2000-01-01 00:00:00") = "1999-12-31T22:00:00Z" with TZ="Asia/Istanbul" localtime2gmt("2000-01-01 00:00:00", "Asia/Istanbul") = "1999-12-31T22:00:00Z" - localtime2sec + 1mlocaltime2sec0m (class=time #args=1,2) Parses local timestamp as integer seconds since the epoch. Consults $TZ environment variable, unless second argument is supplied. Examples: localtime2sec("2001-02-03 04:05:06") = 981165906 with TZ="Asia/Istanbul" localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906" - log + 1mlog0m (class=math #args=1) Natural (base-e) logarithm. - log10 + 1mlog100m (class=math #args=1) Base-10 logarithm. - log1p + 1mlog1p0m (class=math #args=1) log(1-x). - logifit + 1mlogifit0m (class=math #args=3) Given m and b from logistic regression, compute fit: $yhat=logifit($x,$m,$b). - lstrip + 1mlstrip0m (class=string #args=1) Strip leading whitespace from string. - madd + 1mmadd0m (class=arithmetic #args=3) a + b mod m (integers) - mapdiff + 1mmapdiff0m (class=collections #args=variadic) With 0 args, returns empty map. With 1 arg, returns copy of arg. With 2 or more, returns copy of arg 1 with all keys from any of remaining argument maps removed. - mapexcept + 1mmapexcept0m (class=collections #args=variadic) Returns a map with keys from remaining arguments, if any, unset. Remaining arguments can be strings or arrays of string. E.g. 'mapexcept({1:2,3:4,5:6}, 1, 5, 7)' is '{3:4}' and 'mapexcept({1:2,3:4,5:6}, [1, 5, 7])' is '{3:4}'. - mapselect + 1mmapselect0m (class=collections #args=variadic) Returns a map with only keys from remaining arguments set. Remaining arguments can be strings or arrays of string. E.g. 'mapselect({1:2,3:4,5:6}, 1, 5, 7)' is '{1:2,5:6}' and 'mapselect({1:2,3:4,5:6}, [1, 5, 7])' is '{1:2,5:6}'. - mapsum + 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. - max + 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. - md5 + 1mmd50m (class=hashing #args=1) MD5 hash. - mexp + 1mmexp0m (class=arithmetic #args=3) a ** b mod m (integers) - min + 1mmin0m (class=math #args=variadic) Min of n numbers; null loses. - mmul + 1mmmul0m (class=arithmetic #args=3) a * b mod m (integers) - msub + 1mmsub0m (class=arithmetic #args=3) a - b mod m (integers) - os + 1mos0m (class=system #args=0) Returns the operating-system name as a string. - pow + 1mpow0m (class=arithmetic #args=2) Exponentiation. Same as **, but as a function. - qnorm + 1mqnorm0m (class=math #args=1) Normal cumulative distribution function. - reduce + 1mreduce0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element, and return the accumulated element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is the first element for arrays, or the first element's key-value pair for maps. Examples: Array example: reduce([1,2,3,4,5], func(acc,e) {return acc + e**3}) returns 225. Map example: reduce({"a":1, "b":3, "c": 5}, func(acck,accv,ek,ev) {return {"sum_of_squares": accv + ev**2}}) returns {"sum_of_squares": 35}. - regextract + 1mregextract0m (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does. Examples: regextract("index ab09 file", "[a-z][a-z][0-9][0-9]") gives "ab09" regextract("index a999 file", "[a-z][a-z][0-9][0-9]") gives (absent), which will result in an assignment not happening. - regextract_or_else + 1mregextract_or_else0m (class=string #args=3) Like regextract but the third argument is the return value in case the input string (first argument) doesn't match the pattern (second argument). Examples: regextract_or_else("index ab09 file", "[a-z][a-z][0-9][0-9]", "nonesuch") gives "ab09" regextract_or_else("index a999 file", "[a-z][a-z][0-9][0-9]", "nonesuch") gives "nonesuch" - round + 1mround0m (class=math #args=1) Round to nearest integer. - roundm + 1mroundm0m (class=math #args=2) Round to nearest multiple of m: roundm($x,$m) is the same as round($x/$m)*$m. - rstrip + 1mrstrip0m (class=string #args=1) Strip trailing whitespace from string. - sec2dhms + 1msec2dhms0m (class=time #args=1) Formats integer seconds as in sec2dhms(500000) = "5d18h53m20s" - sec2gmt + 1msec2gmt0m (class=time #args=1,2) Formats seconds since epoch as GMT timestamp. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part. Examples: sec2gmt(1234567890) = "2009-02-13T23:31:30Z" sec2gmt(1234567890.123456) = "2009-02-13T23:31:30Z" sec2gmt(1234567890.123456, 6) = "2009-02-13T23:31:30.123456Z" - sec2gmtdate + 1msec2gmtdate0m (class=time #args=1) Formats seconds since epoch (integer part) as GMT timestamp with year-month-date. Leaves non-numbers as-is. Example: sec2gmtdate(1440768801.7) = "2015-08-28". - sec2hms + 1msec2hms0m (class=time #args=1) Formats integer seconds as in sec2hms(5000) = "01:23:20" - sec2localdate + 1msec2localdate0m (class=time #args=1,2) Formats seconds since epoch (integer part) as local timestamp with year-month-date. Leaves non-numbers as-is. Consults $TZ environment variable unless second argument is supplied. Examples: sec2localdate(1440768801.7) = "2015-08-28" with TZ="Asia/Istanbul" sec2localdate(1440768801.7, "Asia/Istanbul") = "2015-08-28" - sec2localtime + 1msec2localtime0m (class=time #args=1,2,3) Formats seconds since epoch (integer part) as local timestamp. Consults $TZ environment variable unless third argument is supplied. Leaves non-numbers as-is. With second integer argument n, includes n decimal places for the seconds part Examples: sec2localtime(1234567890) = "2009-02-14 01:31:30" with TZ="Asia/Istanbul" @@ -2560,31 +2560,31 @@ FUNCTIONS FOR FILTER/PUT sec2localtime(1234567890.123456, 6) = "2009-02-14 01:31:30.123456" with TZ="Asia/Istanbul" sec2localtime(1234567890.123456, 6, "Asia/Istanbul") = "2009-02-14 01:31:30.123456" - select + 1mselect0m (class=higher-order-functions #args=2) Given a map or array as first argument and a function as second argument, includes each input element in the output if the function returns true. For arrays, the function should take one argument, for array element; for maps, it should take two, for map-element key and value. In either case it should return a boolean. Examples: Array example: select([1,2,3,4,5], func(e) {return e >= 3}) returns [3, 4, 5]. Map example: select({"a":1, "b":3, "c":5}, func(k,v) {return v >= 3}) returns {"b":3, "c": 5}. - sgn + 1msgn0m (class=math #args=1) +1, 0, -1 for positive, zero, negative input respectively. - sha1 + 1msha10m (class=hashing #args=1) SHA1 hash. - sha256 + 1msha2560m (class=hashing #args=1) SHA256 hash. - sha512 + 1msha5120m (class=hashing #args=1) SHA512 hash. - sin + 1msin0m (class=math #args=1) Trigonometric sine. - sinh + 1msinh0m (class=math #args=1) Hyperbolic sine. - sort + 1msort0m (class=higher-order-functions #args=1-2) Given a map or array as first argument and string flags or function as optional second argument, returns a sorted copy of the input. With one argument, sorts array elements with numbers first numerically and then strings lexically, and map elements likewise by map keys. If the second argument is a string, it can contain any of "f" for lexical ("n" is for the above default), "c" for case-folded lexical, or "t" for natural sort order. An additional "r" in that string is for reverse. An additional "v" in that string means sort maps by value, rather than by key. If the second argument is a function, then for arrays it should take two arguments a and b, returning < 0, 0, or > 0 as a < b, a == b, or a > b respectively; for maps the function should take four arguments ak, av, bk, and bv, again returning < 0, 0, or > 0, using a and b's keys and values. Examples: Default sorting: sort([3,"A",1,"B",22]) returns [1, 3, 20, "A", "B"]. @@ -2600,67 +2600,67 @@ FUNCTIONS FOR FILTER/PUT Map without function: sort({"c":2,"a":3,"b":1}, "v") returns {"b":1,"c":2,"a":3}. Map without function: sort({"c":2,"a":3,"b":1}, "vnr") returns {"a":3,"c":2,"b":1}. - splita + 1msplita0m (class=conversion #args=2) Splits string into array with type inference. First argument is string to split; second is the separator to split on. Example: splita("3,4,5", ",") = [3,4,5] - splitax + 1msplitax0m (class=conversion #args=2) Splits string into array without type inference. First argument is string to split; second is the separator to split on. Example: splitax("3,4,5", ",") = ["3","4","5"] - splitkv + 1msplitkv0m (class=conversion #args=3) Splits string by separators into map with type inference. First argument is string to split; second argument is pair separator; third argument is field separator. Example: splitkv("a=3,b=4,c=5", "=", ",") = {"a":3,"b":4,"c":5} - splitkvx + 1msplitkvx0m (class=conversion #args=3) Splits string by separators into map without type inference (keys and values are strings). First argument is string to split; second argument is pair separator; third argument is field separator. Example: splitkvx("a=3,b=4,c=5", "=", ",") = {"a":"3","b":"4","c":"5"} - splitnv + 1msplitnv0m (class=conversion #args=2) Splits string by separator into integer-indexed map with type inference. First argument is string to split; second argument is separator to split on. Example: splitnv("a,b,c", ",") = {"1":"a","2":"b","3":"c"} - splitnvx + 1msplitnvx0m (class=conversion #args=2) Splits string by separator into integer-indexed map without type inference (values are strings). First argument is string to split; second argument is separator to split on. Example: splitnvx("3,4,5", ",") = {"1":"3","2":"4","3":"5"} - sqrt + 1msqrt0m (class=math #args=1) Square root. - ssub + 1mssub0m (class=string #args=3) Like sub but does no regexing. No characters are special. Example: ssub("abc.def", ".", "X") gives "abcXdef" - strftime + 1mstrftime0m (class=time #args=2) Formats seconds since the epoch as timestamp. Format strings are as at https://pkg.go.dev/github.com/lestrrat-go/strftime, with the Miller-specific addition of "%1S" through "%9S" which format the seconds with 1 through 9 decimal places, respectively. ("%S" uses no decimal places.) See also https://miller.readthedocs.io/en/latest/reference-dsl-time/ for more information on the differences from the C library ("man strftime" on your system). See also strftime_local. Examples: strftime(1440768801.7,"%Y-%m-%dT%H:%M:%SZ") = "2015-08-28T13:33:21Z" strftime(1440768801.7,"%Y-%m-%dT%H:%M:%3SZ") = "2015-08-28T13:33:21.700Z" - strftime_local + 1mstrftime_local0m (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. Examples: strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%S %z") = "2015-08-28 16:33:21 +0300" with TZ="Asia/Istanbul" strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z") = "2015-08-28 16:33:21.700 +0300" with TZ="Asia/Istanbul" strftime_local(1440768801.7, "%Y-%m-%d %H:%M:%3S %z", "Asia/Istanbul") = "2015-08-28 16:33:21.700 +0300" - string + 1mstring0m (class=conversion #args=1) Convert int/float/bool/string/array/map to string. - strip + 1mstrip0m (class=string #args=1) Strip leading and trailing whitespace from string. - strlen + 1mstrlen0m (class=string #args=1) String length. - strptime + 1mstrptime0m (class=time #args=2) strptime: Parses timestamp as floating-point seconds since the epoch. See also strptime_local. Examples: strptime("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440768801.000000 @@ -2668,7 +2668,7 @@ FUNCTIONS FOR FILTER/PUT strptime("1970-01-01 00:00:00 -0400", "%Y-%m-%d %H:%M:%S %z") = 14400 strptime("1970-01-01 00:00:00 EET", "%Y-%m-%d %H:%M:%S %Z") = -7200 - strptime_local + 1mstrptime_local0m (class=time #args=2,3) Like strftime but consults the $TZ environment variable to get local time zone. Examples: strptime_local("2015-08-28T13:33:21Z", "%Y-%m-%dT%H:%M:%SZ") = 1440758001 with TZ="Asia/Istanbul" @@ -2676,7 +2676,7 @@ FUNCTIONS FOR FILTER/PUT strptime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S") = 1440758001 with TZ="Asia/Istanbul" strptime_local("2015-08-28 13:33:21", "%Y-%m-%d %H:%M:%S", "Asia/Istanbul") = 1440758001 - sub + 1msub0m (class=string #args=3) '$name = sub($name, "old", "new")': replace once (first match, if there are multiple matches), with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to sub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: sub("ababab", "ab", "XY") gives "XYabab" @@ -2685,229 +2685,229 @@ FUNCTIONS FOR FILTER/PUT sub("abcdefg", "[ce]", "X") gives "abXdefg" sub("prefix4529:suffix8567", "suffix([0-9]+)", "name\1") gives "prefix4529:name8567" - substr + 1msubstr0m (class=string #args=3) substr is an alias for substr0. See also substr1. Miller is generally 1-up with all array and string indices, but, this is a backward-compatibility issue with Miller 5 and below. Arrays are new in Miller 6; the substr function is older. - substr0 + 1msubstr00m (class=string #args=3) substr0(s,m,n) gives substring of s from 0-up position m to n inclusive. Negative indices -len .. -1 alias to 0 .. len-1. See also substr and substr1. - substr1 + 1msubstr10m (class=string #args=3) substr1(s,m,n) gives substring of s from 1-up position m to n inclusive. Negative indices -len .. -1 alias to 1 .. len. See also substr and substr0. - system + 1msystem0m (class=system #args=1) Run command string, yielding its stdout minus final carriage return. - systime + 1msystime0m (class=time #args=0) Returns the system time in floating-point seconds since the epoch. - systimeint + 1msystimeint0m (class=time #args=0) Returns the system time in integer seconds since the epoch. - tan + 1mtan0m (class=math #args=1) Trigonometric tangent. - tanh + 1mtanh0m (class=math #args=1) Hyperbolic tangent. - tolower + 1mtolower0m (class=string #args=1) Convert string to lowercase. - toupper + 1mtoupper0m (class=string #args=1) Convert string to uppercase. - truncate + 1mtruncate0m (class=string #args=2) Truncates string first argument to max length of int second argument. - typeof + 1mtypeof0m (class=typing #args=1) Convert argument to type of argument (e.g. "str"). For debug. - unflatten + 1munflatten0m (class=collections #args=2) Reverses flatten. Useful for nested JSON-like structures for non-JSON file formats like CSV. The first argument is a map, and the second argument is the flatten separator. See also arrayify. See "Flatten/unflatten: converting between JSON and tabular formats" at https://miller.readthedocs.io for more information. Example: unflatten({"a.b.c" : 4}, ".") is {"a": "b": { "c": 4 }}. - unformat + 1munformat0m (class=string #args=2) Using first argument as format string, unpacks second argument into an array of matches, with type-inference. On non-match, returns error -- use is_error() to check. Examples: unformat("{}:{}:{}", "1:2:3") gives [1, 2, 3]. unformat("{}h{}m{}s", "3h47m22s") gives [3, 47, 22]. is_error(unformat("{}h{}m{}s", "3:47:22")) gives true. - unformatx + 1munformatx0m (class=string #args=2) Same as unformat, but without type-inference. Examples: unformatx("{}:{}:{}", "1:2:3") gives ["1", "2", "3"]. unformatx("{}h{}m{}s", "3h47m22s") gives ["3", "47", "22"]. is_error(unformatx("{}h{}m{}s", "3:47:22")) gives true. - uptime + 1muptime0m (class=time #args=0) Returns the time in floating-point seconds since the current Miller program was started. - urand + 1murand0m (class=math #args=0) Floating-point numbers uniformly distributed on the unit interval. Example: Int-valued example: '$n=floor(20+urand()*11)'. - urand32 + 1murand320m (class=math #args=0) Integer uniformly distributed 0 and 2**32-1 inclusive. - urandelement + 1murandelement0m (class=math #args=1) Random sample from the first argument, which must be an non-empty array. - urandint + 1murandint0m (class=math #args=2) Integer uniformly distributed between inclusive integer endpoints. - urandrange + 1murandrange0m (class=math #args=2) Floating-point numbers uniformly distributed on the interval [a, b). - utf8_to_latin1 + 1mutf8_to_latin10m (class=string #args=1) Tries to convert UTF-8-encoded string to Latin-1-encoded string. If argument is array or map, recurses into it. Examples: $y = utf8_to_latin1($x) $* = utf8_to_latin1($*) - version + 1mversion0m (class=system #args=0) Returns the Miller version as a string. - ! + 1m!0m (class=boolean #args=1) Logical negation. - != + 1m!=0m (class=boolean #args=2) String/numeric inequality. Mixing number and string results in string compare. - !=~ + 1m!=~0m (class=boolean #args=2) String (left-hand side) does not match regex (right-hand side), e.g. '$name !=~ "^a.*b$"'. - % + 1m%0m (class=arithmetic #args=2) Remainder; never negative-valued (pythonic). - & + 1m&0m (class=arithmetic #args=2) Bitwise AND. - && + 1m&&0m (class=boolean #args=2) Logical AND. - * + 1m*0m (class=arithmetic #args=2) Multiplication, with integer*integer overflow to float. - ** + 1m**0m (class=arithmetic #args=2) Exponentiation. Same as pow, but as an infix operator. - + + 1m+0m (class=arithmetic #args=1,2) Addition as binary operator; unary plus operator. - - + 1m-0m (class=arithmetic #args=1,2) Subtraction as binary operator; unary negation operator. - . + 1m.0m (class=string #args=2) String concatenation. Non-strings are coerced, so you can do '"ax".98' etc. - .* + 1m.*0m (class=arithmetic #args=2) Multiplication, with integer-to-integer overflow. - .+ + 1m.+0m (class=arithmetic #args=2) Addition, with integer-to-integer overflow. - .- + 1m.-0m (class=arithmetic #args=2) Subtraction, with integer-to-integer overflow. - ./ + 1m./0m (class=arithmetic #args=2) Integer division, rounding toward zero. - / + 1m/0m (class=arithmetic #args=2) Division. Integer / integer is integer when exact, else floating-point: e.g. 6/3 is 2 but 6/4 is 1.5. - // + 1m//0m (class=arithmetic #args=2) Pythonic integer division, rounding toward negative. - < + 1m<0m (class=boolean #args=2) String/numeric less-than. Mixing number and string results in string compare. - << + 1m<<0m (class=arithmetic #args=2) Bitwise left-shift. - <= + 1m<=0m (class=boolean #args=2) String/numeric less-than-or-equals. Mixing number and string results in string compare. - <=> + 1m<=>0m (class=boolean #args=2) Comparator, nominally for sorting. Given a <=> b, returns <0, 0, >0 as a < b, a == b, or a > b, respectively. - == + 1m==0m (class=boolean #args=2) String/numeric equality. Mixing number and string results in string compare. - =~ + 1m=~0m (class=boolean #args=2) String (left-hand side) matches regex (right-hand side), e.g. '$name =~ "^a.*b$"'. Capture groups \1 through \9 are matched from (...) in the right-hand side, and can be used within subsequent DSL statements. See also "Regular expressions" at https://miller.readthedocs.io. Examples: With if-statement: if ($url =~ "http.*com") { ... } Without if-statement: given $line = "index ab09 file", and $line =~ "([a-z][a-z])([0-9][0-9])", then $label = "[\1:\2]", $label is "[ab:09]" - > + 1m>0m (class=boolean #args=2) String/numeric greater-than. Mixing number and string results in string compare. - >= + 1m>=0m (class=boolean #args=2) String/numeric greater-than-or-equals. Mixing number and string results in string compare. - >> + 1m>>0m (class=arithmetic #args=2) Bitwise signed right-shift. - >>> + 1m>>>0m (class=arithmetic #args=2) Bitwise unsigned right-shift. - ?: + 1m?:0m (class=boolean #args=3) Standard ternary operator. - ?? + 1m??0m (class=boolean #args=2) Absent-coalesce operator. $a ?? 1 evaluates to 1 if $a isn't defined in the current record. - ??? + 1m???0m (class=boolean #args=2) Absent/empty-coalesce operator. $a ??? 1 evaluates to 1 if $a isn't defined in the current record, or has empty value. - ^ + 1m^0m (class=arithmetic #args=2) Bitwise XOR. - ^^ + 1m^^0m (class=boolean #args=2) Logical XOR. - | + 1m|0m (class=arithmetic #args=2) Bitwise OR. - || + 1m||0m (class=boolean #args=2) Logical OR. - ~ + 1m~0m (class=arithmetic #args=1) Bitwise NOT. Beware '$y=~$x' since =~ is the regex-match operator: try '$y = ~$x'. -KEYWORDS FOR PUT AND FILTER - all +1mKEYWORDS FOR PUT AND FILTER0m + 1mall0m all: used in "emit1", "emit", "emitp", and "unset" as a synonym for @* - begin + 1mbegin0m begin: defines a block of statements to be executed before input records are ingested. The body statements must be wrapped in curly braces. Example: 'begin { @count = 0 }' - bool + 1mbool0m bool: declares a boolean local variable in the current curly-braced scope. Type-checking happens at assignment: 'bool b = 1' is an error. - break + 1mbreak0m break: causes execution to continue after the body of the current for/while/do-while loop. - call + 1mcall0m call: used for invoking a user-defined subroutine. Example: 'subr s(k,v) { print k . " is " . v} call s("a", $a)' - continue + 1mcontinue0m continue: causes execution to skip the remaining statements in the body of the current for/while/do-while loop. For-loop increments are still applied. - do + 1mdo0m do: with "while", introduces a do-while loop. The body statements must be wrapped in curly braces. - dump + 1mdump0m dump: prints all currently defined out-of-stream variables immediately to stdout as JSON. @@ -2926,21 +2926,21 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump >> "mytap.dat"}' Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump | "jq .[]"}' - edump + 1medump0m edump: prints all currently defined out-of-stream variables immediately to stderr as JSON. Example: mlr --from f.dat put -q '@v[NR]=$*; end { edump }' - elif + 1melif0m elif: the way Miller spells "else if". The body statements must be wrapped in curly braces. - else + 1melse0m else: terminates an if/elif/elif chain. The body statements must be wrapped in curly braces. - emit1 + 1memit10m emit1: inserts an out-of-stream variable into the output record stream. Unlike the other map variants, side-by-sides, indexing, and redirection are not supported, but you can emit any map-valued expression. @@ -2950,7 +2950,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emit + 1memit0m emit: inserts an out-of-stream variable into the output record stream. Hashmap indices present in the data but not slotted by emit arguments are not output. @@ -2981,7 +2981,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emitf + 1memitf0m emitf: inserts non-indexed out-of-stream variable(s) side-by-side into the output record stream. @@ -3009,7 +3009,7 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - emitp + 1memitp0m emitp: inserts an out-of-stream variable into the output record stream. Hashmap indices present in the data but not slotted by emitp arguments are output concatenated with ":". @@ -3039,29 +3039,29 @@ KEYWORDS FOR PUT AND FILTER Please see https://miller.readthedocs.io://johnkerl.org/miller/doc for more information. - end + 1mend0m end: defines a block of statements to be executed after input records are ingested. The body statements must be wrapped in curly braces. Example: 'end { emit @count }' Example: 'end { eprint "Final count is " . @count }' - eprint + 1meprint0m eprint: prints expression immediately to stderr. Example: mlr --from f.dat put -q 'eprint "The sum of x and y is ".($x+$y)' Example: mlr --from f.dat put -q 'for (k, v in $*) { eprint k . " => " . v }' Example: mlr --from f.dat put '(NR % 1000 == 0) { eprint "Checkpoint ".NR}' - eprintn + 1meprintn0m eprintn: prints expression immediately to stderr, without trailing newline. Example: mlr --from f.dat put -q 'eprintn "The sum of x and y is ".($x+$y); eprint ""' - false + 1mfalse0m false: the boolean literal value. - filter + 1mfilter0m filter: includes/excludes the record in the output record stream. Example: mlr --from f.dat put 'filter (NR == 2 || $x > 5.4)' @@ -3072,11 +3072,11 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q '@running_sum += $x * $y; emit @running_sum' - float + 1mfloat0m float: declares a floating-point local variable in the current curly-braced scope. Type-checking happens at assignment: 'float x = 0' is an error. - for + 1mfor0m for: defines a for-loop using one of three styles. The body statements must be wrapped in curly braces. For-loop over stream record: @@ -3093,71 +3093,71 @@ KEYWORDS FOR PUT AND FILTER Example: 'for (var i = 0, var b = 1; i < 10; i += 1, b *= 2) { ... }' - func + 1mfunc0m func: used for defining a user-defined function. Example: 'func f(a,b) { return sqrt(a**2+b**2)} $d = f($x, $y)' - funct + 1mfunct0m funct: used for saying that a function argument is a user-defined function. Example: 'func g(num a, num b, funct f) :num { return f(a**2+b**2) }' - if + 1mif0m if: starts an if/elif/elif chain. The body statements must be wrapped in curly braces. - in + 1min0m in: used in for-loops over stream records or out-of-stream variables. - int + 1mint0m int: declares an integer local variable in the current curly-braced scope. Type-checking happens at assignment: 'int x = 0.0' is an error. - map + 1mmap0m map: declares an map-valued local variable in the current curly-braced scope. Type-checking happens at assignment: 'map b = 0' is an error. map b = {} is always OK. map b = a is OK or not depending on whether a is a map. - num + 1mnum0m num: declares an int/float local variable in the current curly-braced scope. Type-checking happens at assignment: 'num b = true' is an error. - print + 1mprint0m print: prints expression immediately to stdout. Example: mlr --from f.dat put -q 'print "The sum of x and y is ".($x+$y)' Example: mlr --from f.dat put -q 'for (k, v in $*) { print k . " => " . v }' Example: mlr --from f.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}' - printn + 1mprintn0m printn: prints expression immediately to stdout, without trailing newline. Example: mlr --from f.dat put -q 'printn "."; end { print "" }' - return + 1mreturn0m return: specifies the return value from a user-defined function. Omitted return statements (including via if-branches) result in an absent-null return value, which in turns results in a skipped assignment to an LHS. - stderr + 1mstderr0m stderr: Used for tee, emit, emitf, emitp, print, and dump in place of filename to print to standard error. - stdout + 1mstdout0m stdout: Used for tee, emit, emitf, emitp, print, and dump in place of filename to print to standard output. - str + 1mstr0m str: declares a string local variable in the current curly-braced scope. Type-checking happens at assignment. - subr + 1msubr0m subr: used for defining a subroutine. Example: 'subr s(k,v) { print k . " is " . v} call s("a", $a)' - tee + 1mtee0m tee: prints the current record to specified file. This is an immediate print to the specified file (except for pprint format which of course waits until the end of the input stream to format all output). @@ -3183,10 +3183,10 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*' Example: mlr --from f.dat put -q --ojson 'tee | "gzip > /tmp/data-".$a.".gz", $*' - true + 1mtrue0m true: the boolean literal value. - unset + 1munset0m unset: clears field(s) from the current record, or an out-of-stream or local variable. Example: mlr --from f.dat put 'unset $x' @@ -3196,75 +3196,75 @@ KEYWORDS FOR PUT AND FILTER Example: mlr --from f.dat put '...; unset @sums["green"]' Example: mlr --from f.dat put '...; unset @*' - var + 1mvar0m var: declares an untyped local variable in the current curly-braced scope. Examples: 'var a=1', 'var xyz=""' - while + 1mwhile0m while: introduces a while loop, or with "do", introduces a do-while loop. The body statements must be wrapped in curly braces. - ENV + 1mENV0m ENV: access to environment variables by name, e.g. '$home = ENV["HOME"]' - FILENAME + 1mFILENAME0m FILENAME: evaluates to the name of the current file being processed. - FILENUM + 1mFILENUM0m FILENUM: evaluates to the number of the current file being processed, starting with 1. - FNR + 1mFNR0m FNR: evaluates to the number of the current record within the current file being processed, starting with 1. Resets at the start of each file. - IFS + 1mIFS0m IFS: evaluates to the input field separator from the command line. - IPS + 1mIPS0m IPS: evaluates to the input pair separator from the command line. - IRS + 1mIRS0m IRS: evaluates to the input record separator from the command line, or to LF or CRLF from the input data if in autodetect mode (which is the default). - M_E + 1mM_E0m M_E: the mathematical constant e. - M_PI + 1mM_PI0m M_PI: the mathematical constant pi. - NF + 1mNF0m NF: evaluates to the number of fields in the current record. - NR + 1mNR0m NR: evaluates to the number of the current record over all files being processed, starting with 1. Does not reset at the start of each file. - OFS + 1mOFS0m OFS: evaluates to the output field separator from the command line. - OPS + 1mOPS0m OPS: evaluates to the output pair separator from the command line. - ORS + 1mORS0m ORS: evaluates to the output record separator from the command line, or to LF or CRLF from the input data if in autodetect mode (which is the default). -AUTHOR +1mAUTHOR0m Miller is written by John Kerl