From 64afd267740863653869ab6135491dd3487ffd73 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Wed, 22 Jul 2020 09:36:03 -0700
Subject: [PATCH 01/20] Add syntax for SIMD instructions

---
 document/core/syntax/instructions.rst | 152 ++++++++++++++++++++++++--
 document/core/util/macros.def         |  22 ++++
 2 files changed, 165 insertions(+), 9 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 4a21904f7..ad8381577 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -35,6 +35,16 @@ The following sections group instructions into a number of different categories.
 .. _syntax-fbinop:
 .. _syntax-ftestop:
 .. _syntax-frelop:
+.. _syntax-vunop:
+.. _syntax-vbinop:
+.. _syntax-vternop:
+.. _syntax-vtestop:
+.. _syntax-vshiftop:
+.. _syntax-viunop:
+.. _syntax-vibinop:
+.. _syntax-vsatbinop:
+.. _syntax-vfunop:
+.. _syntax-vfbinop:
 .. _syntax-instr-numeric:
 
 Numeric Instructions
@@ -47,15 +57,27 @@ These operations closely match respective operations available in hardware.
    \begin{array}{llcl}
    \production{width} & \X{nn}, \X{mm} &::=&
      \K{32} ~|~ \K{64} \\
+   \production{simdwidth} & \X{sss} &::=&
+     \K{128} \\
    \production{signedness} & \sx &::=&
      \K{u} ~|~ \K{s} \\
+   \production{ishape} & \X{ixx} &::=&
+     \K{i8x16} ~|~ \K{i16x8} ~|~ \K{i32x4} ~|~ \K{i64x2} \\
+   \production{fshape} & \X{fxx} &::=&
+     \K{f32x4} ~|~ \K{f64x2} \\
+   \production{vshape} & \X{vxx} &::=&
+     \X{ixx} ~|~ \X{fxx} \\
    \production{instruction} & \instr &::=&
      \K{i}\X{nn}\K{.}\CONST~\xref{syntax/values}{syntax-int}{\iX{\X{nn}}} ~|~
-     \K{f}\X{nn}\K{.}\CONST~\xref{syntax/values}{syntax-float}{\fX{\X{nn}}} \\&&|&
+     \K{f}\X{nn}\K{.}\CONST~\xref{syntax/values}{syntax-float}{\fX{\X{nn}}} ~|~
+     \K{v}\X{sss}\K{.}\CONST~\xref{syntax/values}{syntax-simd}{\vX{\X{sss}}} \\&&|&
      \K{i}\X{nn}\K{.}\iunop ~|~
-     \K{f}\X{nn}\K{.}\funop \\&&|&
+     \K{f}\X{nn}\K{.}\funop ~|~
+     \K{v}\X{sss}\K{.}\vunop \\&&|&
      \K{i}\X{nn}\K{.}\ibinop ~|~
-     \K{f}\X{nn}\K{.}\fbinop \\&&|&
+     \K{f}\X{nn}\K{.}\fbinop ~|~
+     \K{v}\X{sss}\K{.}\vbinop \\&&|&
+     \K{v}\X{sss}\K{.}\vternop \\&&|&
      \K{i}\X{nn}\K{.}\itestop \\&&|&
      \K{i}\X{nn}\K{.}\irelop ~|~
      \K{f}\X{nn}\K{.}\frelop \\&&|&
@@ -71,6 +93,49 @@ These operations closely match respective operations available in hardware.
      \K{f}\X{nn}\K{.}\CONVERT\K{\_i}\X{mm}\K{\_}\sx \\&&|&
      \K{i}\X{nn}\K{.}\REINTERPRET\K{\_f}\X{nn} ~|~
      \K{f}\X{nn}\K{.}\REINTERPRET\K{\_i}\X{nn} \\&&|&
+     \K{v8x16.}\SHUFFLE ~|~ \K{v8x16.}\SWIZZLE \\&&|&
+     \X{vxx}\K{.}\SPLAT \\&&|&
+     \K{i8x16.}\EXTRACTLANE\K{\_}\sx ~|~
+     \K{i16x8.}\EXTRACTLANE\K{\_}\sx \\&&|&
+     \K{i32x4.}\EXTRACTLANE ~|~
+     \K{i64x2.}\EXTRACTLANE \\&&|&
+     \X{fxx}\K{.}\EXTRACTLANE \\&&|&
+     \X{vxx}\K{.}\REPLACELANE \\&&|&
+     \X{ixx}\K{.}\irelop \\&&|&
+     \X{fxx}\K{.}\frelop \\&&|&
+     \K{i8x16.}\viunop ~|~
+     \K{i16x8.}\viunop ~|~
+     \K{i32x4.}\viunop \\&&|&
+     \K{i64x2.}\NEG \\&&|&
+     \X{fxx.}\vfunop \\&&|&
+     \K{i8x16.}\vtestop ~|~
+     \K{i16x8.}\vtestop ~|~
+     \K{i32x4.}\vtestop \\&&|&
+     \K{i8x16.}\BITMASK ~|~
+     \K{i16x8.}\BITMASK ~|~
+     \K{i32x4.}\BITMASK \\&&|&
+     \K{i8x16.}\NARROW\K{\_i16x8\_}\sx ~|~
+     \K{i16x8.}\NARROW\K{\_i32x4\_}\sx \\&&|&
+     \K{i16x8.}\WIDEN\K{\_low}\K{\_i8x16\_}\sx ~|~
+     \K{i32x4.}\WIDEN\K{\_low}\K{\_i16x8\_}\sx \\&&|&
+     \K{i16x8.}\WIDEN\K{\_high}\K{\_i8x16\_}\sx ~|~
+     \K{i32x4.}\WIDEN\K{\_high}\K{\_i16x8\_}\sx \\&&|&
+     \X{ixx}\K{.}\vshiftop \\&&|&
+     \K{i8x16.}\vibinop ~|~
+     \K{i16x8.}\vibinop ~|~
+     \K{i32x4.}\vibinop \\&&|&
+     \K{i64x2.}\ADD ~|~
+     \K{i64x2.}\SUB \\&&|&
+     \K{i8x16.}\vsatbinop ~|~
+     \K{i16x8.}\vsatbinop \\&&|&
+     \K{i16x8.}\K{mul} ~|~
+     \K{i32x4.}\K{mul} ~|~
+     \K{i64x2.}\K{mul} \\&&|&
+     \K{i8x16.}\AVGRU ~|~
+     \K{i16x8.}\AVGRU \\&&|&
+     \X{fxx.}\vfbinop \\&&|&
+     \K{i32x4.}\TRUNC\K{\_sat\_f32x4\_}\sx ~|~
+     \K{f32x4.}\CONVERT\K{\_i32x4\_}\sx \\&&|&
      \dots \\
    \production{integer unary operator} & \iunop &::=&
      \K{clz} ~|~
@@ -105,8 +170,20 @@ These operations closely match respective operations available in hardware.
      \K{min} ~|~
      \K{max} ~|~
      \K{copysign} \\
+   \production{SIMD unary operator} & \vunop &::=&
+     \K{not} \\
+   \production{SIMD binary operator} & \vbinop &::=&
+     \K{and} ~|~
+     \K{andnot} ~|~
+     \K{or} ~|~
+     \K{xor} \\
+   \production{SIMD unary operator} & \vternop &::=&
+     \K{bitselect} \\
    \production{integer test operator} & \itestop &::=&
      \K{eqz} \\
+   \production{SIMD test operator} & \vtestop &::=&
+     \K{any\_true} ~|~
+     \K{all\_true} \\
    \production{integer relational operator} & \irelop &::=&
      \K{eq} ~|~
      \K{ne} ~|~
@@ -121,6 +198,32 @@ These operations closely match respective operations available in hardware.
      \K{gt} ~|~
      \K{le} ~|~
      \K{ge} \\
+   \production{SIMD integer shift operator} & \vshiftop &::=&
+     \K{shl} ~|~
+     \K{shr\_s} ~|~
+     \K{shr\_u} \\
+   \production{SIMD integer unary operator} & \viunop &::=&
+     \K{abs} ~|~
+     \K{neg} \\
+   \production{SIMD integer binary operator} & \vibinop &::=&
+     \K{add} ~|~
+     \K{sub} ~|~
+     \K{min\_}\sx ~|~
+     \K{max\_}\sx \\
+   \production{SIMD integer saturating binary operator} & \vsatbinop &::=&
+     \K{add\_sat\_}\sx ~|~
+     \K{sub\_sat\_}\sx \\
+   \production{SIMD floating-point unary operator} & \vfunop &::=&
+     \K{abs} ~|~
+     \K{neg} ~|~
+     \K{sqrt} \\
+   \production{SIMD floating-point binary operator} & \vfbinop &::=&
+     \K{add} ~|~
+     \K{sub} ~|~
+     \K{mul} ~|~
+     \K{div} ~|~
+     \K{min} ~|~
+     \K{max} \\
    \end{array}
 
 Numeric instructions are divided by :ref:`value type <syntax-valtype>`.
@@ -134,15 +237,25 @@ For each type, several subcategories can be distinguished:
 
 * *Tests*: consume one operand of the respective type and produce a Boolean integer result.
 
-* *Comparisons*: consume two operands of the respective type and produce a Boolean integer result.
+* *Comparisons*: consume two operands of the respective type and produce a Boolean integer result or a result of the respective type.
 
 * *Conversions*: consume a value of one type and produce a result of another
   (the source type of the conversion is the one after the ":math:`\K{\_}`").
 
+.. todo::
+  Do these subcategories have to cover every instruction? E.g. simd shifts don't fit anywhere here, since they take 128-bit int and a 32-bit int.
+
 Some integer instructions come in two flavors,
 where a signedness annotation |sx| distinguishes whether the operands are to be :ref:`interpreted <aux-signed>` as :ref:`unsigned <syntax-uint>` or :ref:`signed <syntax-sint>` integers.
 For the other integer instructions, the use of two's complement for the signed interpretation means that they behave the same regardless of signedness.
 
+Instructions that operate on |V128| operands have a naming convention that
+determines how those operands will be interpreted. An instruction beginning with :math:`\K{i32x4}`
+will interpret its operands as four |i32|, packed side-by-side into a |i128|.
+Similarly, and instruction beginning with :math:`\K{f64x2}` interprets its operands as two |f64|, packed side-by-side into a |i128|.
+
+.. todo::
+  write up runtime interpretation for the lane shapes
 
 Conventions
 ...........
@@ -154,9 +267,22 @@ Occasionally, it is convenient to group operators together according to the foll
    \production{unary operator} & \unop &::=&
      \iunop ~|~
      \funop ~|~
+     \vunop ~|~
+     \viunop ~|~
+     \vfunop ~|~
      \EXTEND{N}\K{\_s} \\
-   \production{binary operator} & \binop &::=& \ibinop ~|~ \fbinop \\
-   \production{test operator} & \testop &::=& \itestop \\
+   \production{binary operator} & \binop &::=&
+   \ibinop ~|~
+   \fbinop ~|~
+   \vbinop ~|~
+   \vibinop ~|~
+   \vsatbinop ~|~
+   \vfbinop ~|~
+   \AVGRU
+   \\
+   \production{test operator} & \testop &::=&
+   \itestop ~|~
+   \vtestop \\
    \production{relational operator} & \relop &::=& \irelop ~|~ \frelop \\
    \production{conversion operator} & \cvtop &::=&
      \WRAP ~|~
@@ -166,7 +292,9 @@ Occasionally, it is convenient to group operators together according to the foll
      \CONVERT ~|~
      \DEMOTE ~|~
      \PROMOTE ~|~
-     \REINTERPRET \\
+     \REINTERPRET ~|~
+     \NARROW ~|~
+     \WIDEN \\
    \end{array}
 
 
@@ -235,15 +363,21 @@ Instructions in this group are concerned with linear :ref:`memory <syntax-mem>`.
    \production{instruction} & \instr &::=&
      \dots \\&&|&
      \K{i}\X{nn}\K{.}\LOAD~\memarg ~|~
-     \K{f}\X{nn}\K{.}\LOAD~\memarg \\&&|&
+     \K{f}\X{nn}\K{.}\LOAD~\memarg ~|~
+     \K{v}\X{sss}\K{.}\LOAD~\memarg \\&&|&
      \K{i}\X{nn}\K{.}\STORE~\memarg ~|~
-     \K{f}\X{nn}\K{.}\STORE~\memarg \\&&|&
+     \K{f}\X{nn}\K{.}\STORE~\memarg ~|~
+     \K{v}\X{sss}\K{.}\STORE~\memarg \\&&|&
      \K{i}\X{nn}\K{.}\LOAD\K{8\_}\sx~\memarg ~|~
      \K{i}\X{nn}\K{.}\LOAD\K{16\_}\sx~\memarg ~|~
      \K{i64.}\LOAD\K{32\_}\sx~\memarg \\&&|&
      \K{i}\X{nn}\K{.}\STORE\K{8}~\memarg ~|~
      \K{i}\X{nn}\K{.}\STORE\K{16}~\memarg ~|~
      \K{i64.}\STORE\K{32}~\memarg \\&&|&
+     \K{i16x8.}\LOAD\K{8x8}\_\sx ~|~
+     \K{i32x4.}\LOAD\K{16x4}\_\sx ~|~
+     \K{i64x2.}\LOAD\K{32x2}\_\sx \\&&|&
+     \K{v}\X{ixx}\K{.}\LOAD\K{\_splat} \\&&|&
      \MEMORYSIZE \\&&|&
      \MEMORYGROW \\
    \end{array}
diff --git a/document/core/util/macros.def b/document/core/util/macros.def
index 8bd8a0caa..869644386 100644
--- a/document/core/util/macros.def
+++ b/document/core/util/macros.def
@@ -129,6 +129,7 @@
 .. |sX#1| mathdef:: {\X{s#1}}
 .. |iX#1| mathdef:: {\X{i#1}}
 .. |fX#1| mathdef:: {\X{f#1}}
+.. |vX#1| mathdef:: {\X{v#1}}
 
 .. |uN| mathdef:: \xref{syntax/values}{syntax-int}{\X{u}N}
 .. |uM| mathdef:: \xref{syntax/values}{syntax-int}{\X{u}M}
@@ -378,6 +379,16 @@
 .. |DEMOTE| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{demote}}
 .. |REINTERPRET| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{reinterpret}}
 
+.. |SHUFFLE| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{shuffle}}
+.. |SWIZZLE| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{swizzle}}
+.. |SPLAT| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{splat}}
+.. |EXTRACTLANE| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{extract\_lane}}
+.. |REPLACELANE| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{replace\_lane}}
+.. |BITMASK| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{bitmask}}
+.. |NARROW| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{narrow}}
+.. |WIDEN| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{widen}}
+.. |AVGRU| mathdef:: \xref{syntax/instructions}{syntax-instr-numeric}{\K{avgr\_u}}
+
 
 .. Instructions, non-terminals
 
@@ -397,6 +408,17 @@
 .. |ftestop| mathdef:: \xref{syntax/instructions}{syntax-ftestop}{\X{ftestop}}
 .. |frelop| mathdef:: \xref{syntax/instructions}{syntax-frelop}{\X{frelop}}
 
+.. |vunop| mathdef:: \xref{syntax/instructions}{syntax-vunop}{\X{vunop}}
+.. |vbinop| mathdef:: \xref{syntax/instructions}{syntax-vbinop}{\X{vbinop}}
+.. |vternop| mathdef:: \xref{syntax/instructions}{syntax-vternop}{\X{vternop}}
+.. |vtestop| mathdef:: \xref{syntax/instructions}{syntax-vtestop}{\X{vtestop}}
+.. |vshiftop| mathdef:: \xref{syntax/instructions}{syntax-vshiftop}{\X{vshiftop}}
+.. |viunop| mathdef:: \xref{syntax/instructions}{syntax-viunop}{\X{viunop}}
+.. |vibinop| mathdef:: \xref{syntax/instructions}{syntax-vibinop}{\X{vibinop}}
+.. |vsatbinop| mathdef:: \xref{syntax/instructions}{syntax-vsatbinop}{\X{vsatbinop}}
+.. |vfunop| mathdef:: \xref{syntax/instructions}{syntax-vfunop}{\X{vfunop}}
+.. |vfbinop| mathdef:: \xref{syntax/instructions}{syntax-vfbinop}{\X{vfbinop}}
+
 .. |sx| mathdef:: \xref{syntax/instructions}{syntax-sx}{\X{sx}}
 .. |memarg| mathdef:: \xref{syntax/instructions}{syntax-memarg}{\X{memarg}}
 

From 6e3d12e9cfc4342ce2034c832dc69d1e284184fc Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Fri, 24 Jul 2020 12:58:21 -0700
Subject: [PATCH 02/20] Fix typo and indents in instructions.rst

---
 document/core/syntax/instructions.rst | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index ad8381577..55cf13aaf 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -177,7 +177,7 @@ These operations closely match respective operations available in hardware.
      \K{andnot} ~|~
      \K{or} ~|~
      \K{xor} \\
-   \production{SIMD unary operator} & \vternop &::=&
+   \production{SIMD ternary operator} & \vternop &::=&
      \K{bitselect} \\
    \production{integer test operator} & \itestop &::=&
      \K{eqz} \\
@@ -272,17 +272,16 @@ Occasionally, it is convenient to group operators together according to the foll
      \vfunop ~|~
      \EXTEND{N}\K{\_s} \\
    \production{binary operator} & \binop &::=&
-   \ibinop ~|~
-   \fbinop ~|~
-   \vbinop ~|~
-   \vibinop ~|~
-   \vsatbinop ~|~
-   \vfbinop ~|~
-   \AVGRU
-   \\
+     \ibinop ~|~
+     \fbinop ~|~
+     \vbinop ~|~
+     \vibinop ~|~
+     \vsatbinop ~|~
+     \vfbinop ~|~
+     \AVGRU \\
    \production{test operator} & \testop &::=&
-   \itestop ~|~
-   \vtestop \\
+     \itestop ~|~
+     \vtestop \\
    \production{relational operator} & \relop &::=& \irelop ~|~ \frelop \\
    \production{conversion operator} & \cvtop &::=&
      \WRAP ~|~

From 78313df10c575039c9c9be0995bacc6a91243a9f Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Mon, 27 Jul 2020 15:09:50 -0700
Subject: [PATCH 03/20] Move SIMD instructions into their own section

---
 document/core/syntax/instructions.rst | 272 ++++++++++++++------------
 1 file changed, 148 insertions(+), 124 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 55cf13aaf..960b6e927 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -35,16 +35,6 @@ The following sections group instructions into a number of different categories.
 .. _syntax-fbinop:
 .. _syntax-ftestop:
 .. _syntax-frelop:
-.. _syntax-vunop:
-.. _syntax-vbinop:
-.. _syntax-vternop:
-.. _syntax-vtestop:
-.. _syntax-vshiftop:
-.. _syntax-viunop:
-.. _syntax-vibinop:
-.. _syntax-vsatbinop:
-.. _syntax-vfunop:
-.. _syntax-vfbinop:
 .. _syntax-instr-numeric:
 
 Numeric Instructions
@@ -53,31 +43,22 @@ Numeric Instructions
 Numeric instructions provide basic operations over numeric :ref:`values <syntax-value>` of specific :ref:`type <syntax-valtype>`.
 These operations closely match respective operations available in hardware.
 
+.. todo::
+   add a note about how 128-bit SIMD instructions are different from numeric instructions and are described in the section below.
+
 .. math::
    \begin{array}{llcl}
    \production{width} & \X{nn}, \X{mm} &::=&
      \K{32} ~|~ \K{64} \\
-   \production{simdwidth} & \X{sss} &::=&
-     \K{128} \\
    \production{signedness} & \sx &::=&
      \K{u} ~|~ \K{s} \\
-   \production{ishape} & \X{ixx} &::=&
-     \K{i8x16} ~|~ \K{i16x8} ~|~ \K{i32x4} ~|~ \K{i64x2} \\
-   \production{fshape} & \X{fxx} &::=&
-     \K{f32x4} ~|~ \K{f64x2} \\
-   \production{vshape} & \X{vxx} &::=&
-     \X{ixx} ~|~ \X{fxx} \\
    \production{instruction} & \instr &::=&
      \K{i}\X{nn}\K{.}\CONST~\xref{syntax/values}{syntax-int}{\iX{\X{nn}}} ~|~
-     \K{f}\X{nn}\K{.}\CONST~\xref{syntax/values}{syntax-float}{\fX{\X{nn}}} ~|~
-     \K{v}\X{sss}\K{.}\CONST~\xref{syntax/values}{syntax-simd}{\vX{\X{sss}}} \\&&|&
+     \K{f}\X{nn}\K{.}\CONST~\xref{syntax/values}{syntax-float}{\fX{\X{nn}}} \\&&|&
      \K{i}\X{nn}\K{.}\iunop ~|~
-     \K{f}\X{nn}\K{.}\funop ~|~
-     \K{v}\X{sss}\K{.}\vunop \\&&|&
+     \K{f}\X{nn}\K{.}\funop \\&&|&
      \K{i}\X{nn}\K{.}\ibinop ~|~
-     \K{f}\X{nn}\K{.}\fbinop ~|~
-     \K{v}\X{sss}\K{.}\vbinop \\&&|&
-     \K{v}\X{sss}\K{.}\vternop \\&&|&
+     \K{f}\X{nn}\K{.}\fbinop \\&&|&
      \K{i}\X{nn}\K{.}\itestop \\&&|&
      \K{i}\X{nn}\K{.}\irelop ~|~
      \K{f}\X{nn}\K{.}\frelop \\&&|&
@@ -93,49 +74,6 @@ These operations closely match respective operations available in hardware.
      \K{f}\X{nn}\K{.}\CONVERT\K{\_i}\X{mm}\K{\_}\sx \\&&|&
      \K{i}\X{nn}\K{.}\REINTERPRET\K{\_f}\X{nn} ~|~
      \K{f}\X{nn}\K{.}\REINTERPRET\K{\_i}\X{nn} \\&&|&
-     \K{v8x16.}\SHUFFLE ~|~ \K{v8x16.}\SWIZZLE \\&&|&
-     \X{vxx}\K{.}\SPLAT \\&&|&
-     \K{i8x16.}\EXTRACTLANE\K{\_}\sx ~|~
-     \K{i16x8.}\EXTRACTLANE\K{\_}\sx \\&&|&
-     \K{i32x4.}\EXTRACTLANE ~|~
-     \K{i64x2.}\EXTRACTLANE \\&&|&
-     \X{fxx}\K{.}\EXTRACTLANE \\&&|&
-     \X{vxx}\K{.}\REPLACELANE \\&&|&
-     \X{ixx}\K{.}\irelop \\&&|&
-     \X{fxx}\K{.}\frelop \\&&|&
-     \K{i8x16.}\viunop ~|~
-     \K{i16x8.}\viunop ~|~
-     \K{i32x4.}\viunop \\&&|&
-     \K{i64x2.}\NEG \\&&|&
-     \X{fxx.}\vfunop \\&&|&
-     \K{i8x16.}\vtestop ~|~
-     \K{i16x8.}\vtestop ~|~
-     \K{i32x4.}\vtestop \\&&|&
-     \K{i8x16.}\BITMASK ~|~
-     \K{i16x8.}\BITMASK ~|~
-     \K{i32x4.}\BITMASK \\&&|&
-     \K{i8x16.}\NARROW\K{\_i16x8\_}\sx ~|~
-     \K{i16x8.}\NARROW\K{\_i32x4\_}\sx \\&&|&
-     \K{i16x8.}\WIDEN\K{\_low}\K{\_i8x16\_}\sx ~|~
-     \K{i32x4.}\WIDEN\K{\_low}\K{\_i16x8\_}\sx \\&&|&
-     \K{i16x8.}\WIDEN\K{\_high}\K{\_i8x16\_}\sx ~|~
-     \K{i32x4.}\WIDEN\K{\_high}\K{\_i16x8\_}\sx \\&&|&
-     \X{ixx}\K{.}\vshiftop \\&&|&
-     \K{i8x16.}\vibinop ~|~
-     \K{i16x8.}\vibinop ~|~
-     \K{i32x4.}\vibinop \\&&|&
-     \K{i64x2.}\ADD ~|~
-     \K{i64x2.}\SUB \\&&|&
-     \K{i8x16.}\vsatbinop ~|~
-     \K{i16x8.}\vsatbinop \\&&|&
-     \K{i16x8.}\K{mul} ~|~
-     \K{i32x4.}\K{mul} ~|~
-     \K{i64x2.}\K{mul} \\&&|&
-     \K{i8x16.}\AVGRU ~|~
-     \K{i16x8.}\AVGRU \\&&|&
-     \X{fxx.}\vfbinop \\&&|&
-     \K{i32x4.}\TRUNC\K{\_sat\_f32x4\_}\sx ~|~
-     \K{f32x4.}\CONVERT\K{\_i32x4\_}\sx \\&&|&
      \dots \\
    \production{integer unary operator} & \iunop &::=&
      \K{clz} ~|~
@@ -170,20 +108,8 @@ These operations closely match respective operations available in hardware.
      \K{min} ~|~
      \K{max} ~|~
      \K{copysign} \\
-   \production{SIMD unary operator} & \vunop &::=&
-     \K{not} \\
-   \production{SIMD binary operator} & \vbinop &::=&
-     \K{and} ~|~
-     \K{andnot} ~|~
-     \K{or} ~|~
-     \K{xor} \\
-   \production{SIMD ternary operator} & \vternop &::=&
-     \K{bitselect} \\
    \production{integer test operator} & \itestop &::=&
      \K{eqz} \\
-   \production{SIMD test operator} & \vtestop &::=&
-     \K{any\_true} ~|~
-     \K{all\_true} \\
    \production{integer relational operator} & \irelop &::=&
      \K{eq} ~|~
      \K{ne} ~|~
@@ -198,32 +124,6 @@ These operations closely match respective operations available in hardware.
      \K{gt} ~|~
      \K{le} ~|~
      \K{ge} \\
-   \production{SIMD integer shift operator} & \vshiftop &::=&
-     \K{shl} ~|~
-     \K{shr\_s} ~|~
-     \K{shr\_u} \\
-   \production{SIMD integer unary operator} & \viunop &::=&
-     \K{abs} ~|~
-     \K{neg} \\
-   \production{SIMD integer binary operator} & \vibinop &::=&
-     \K{add} ~|~
-     \K{sub} ~|~
-     \K{min\_}\sx ~|~
-     \K{max\_}\sx \\
-   \production{SIMD integer saturating binary operator} & \vsatbinop &::=&
-     \K{add\_sat\_}\sx ~|~
-     \K{sub\_sat\_}\sx \\
-   \production{SIMD floating-point unary operator} & \vfunop &::=&
-     \K{abs} ~|~
-     \K{neg} ~|~
-     \K{sqrt} \\
-   \production{SIMD floating-point binary operator} & \vfbinop &::=&
-     \K{add} ~|~
-     \K{sub} ~|~
-     \K{mul} ~|~
-     \K{div} ~|~
-     \K{min} ~|~
-     \K{max} \\
    \end{array}
 
 Numeric instructions are divided by :ref:`value type <syntax-valtype>`.
@@ -237,7 +137,7 @@ For each type, several subcategories can be distinguished:
 
 * *Tests*: consume one operand of the respective type and produce a Boolean integer result.
 
-* *Comparisons*: consume two operands of the respective type and produce a Boolean integer result or a result of the respective type.
+* *Comparisons*: consume two operands of the respective type and produce a Boolean integer result.
 
 * *Conversions*: consume a value of one type and produce a result of another
   (the source type of the conversion is the one after the ":math:`\K{\_}`").
@@ -267,21 +167,9 @@ Occasionally, it is convenient to group operators together according to the foll
    \production{unary operator} & \unop &::=&
      \iunop ~|~
      \funop ~|~
-     \vunop ~|~
-     \viunop ~|~
-     \vfunop ~|~
      \EXTEND{N}\K{\_s} \\
-   \production{binary operator} & \binop &::=&
-     \ibinop ~|~
-     \fbinop ~|~
-     \vbinop ~|~
-     \vibinop ~|~
-     \vsatbinop ~|~
-     \vfbinop ~|~
-     \AVGRU \\
-   \production{test operator} & \testop &::=&
-     \itestop ~|~
-     \vtestop \\
+   \production{binary operator} & \binop &::=& \ibinop ~|~ \fbinop \\
+   \production{test operator} & \testop &::=& \itestop \\
    \production{relational operator} & \relop &::=& \irelop ~|~ \frelop \\
    \production{conversion operator} & \cvtop &::=&
      \WRAP ~|~
@@ -291,9 +179,7 @@ Occasionally, it is convenient to group operators together according to the foll
      \CONVERT ~|~
      \DEMOTE ~|~
      \PROMOTE ~|~
-     \REINTERPRET ~|~
-     \NARROW ~|~
-     \WIDEN \\
+     \REINTERPRET \\
    \end{array}
 
 
@@ -301,6 +187,144 @@ Occasionally, it is convenient to group operators together according to the foll
    pair: abstract syntax; instruction
 .. _syntax-instr-parametric:
 
+.. index:: ! simd instruction, fixed-width simd, value, value type
+   pair: abstract syntax; instruction
+.. _syntax-vunop:
+.. _syntax-vbinop:
+.. _syntax-vternop:
+.. _syntax-vtestop:
+.. _syntax-vshiftop:
+.. _syntax-viunop:
+.. _syntax-vibinop:
+.. _syntax-vsatbinop:
+.. _syntax-vfunop:
+.. _syntax-vfbinop:
+.. _syntax-instr-simd:
+
+SIMD Instructions
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. math::
+   \begin{array}{llcl}
+   \production{simdwidth} & \X{sss} &::=&
+     \K{128} \\
+   \production{signedness} & \sx &::=&
+     \K{u} ~|~ \K{s} \\
+   \production{ishape} & \X{ixx} &::=&
+     \K{i8x16} ~|~ \K{i16x8} ~|~ \K{i32x4} ~|~ \K{i64x2} \\
+   \production{fshape} & \X{fxx} &::=&
+     \K{f32x4} ~|~ \K{f64x2} \\
+   \production{vshape} & \X{vxx} &::=&
+     \X{ixx} ~|~ \X{fxx} \\
+   \production{instruction} & \instr &::=&
+     \K{v}\X{sss}\K{.}\CONST~\xref{syntax/values}{syntax-simd}{\vX{\X{sss}}} \\&&|&
+     \K{v}\X{sss}\K{.}\vunop \\&&|&
+     \K{v}\X{sss}\K{.}\vbinop \\&&|&
+     \K{v}\X{sss}\K{.}\vternop \\&&|&
+     \K{v8x16.}\SHUFFLE ~|~ \K{v8x16.}\SWIZZLE \\&&|&
+     \X{vxx}\K{.}\SPLAT \\&&|&
+     \K{i8x16.}\EXTRACTLANE\K{\_}\sx ~|~
+     \K{i16x8.}\EXTRACTLANE\K{\_}\sx \\&&|&
+     \K{i32x4.}\EXTRACTLANE ~|~
+     \K{i64x2.}\EXTRACTLANE \\&&|&
+     \X{fxx}\K{.}\EXTRACTLANE \\&&|&
+     \X{vxx}\K{.}\REPLACELANE \\&&|&
+     \X{ixx}\K{.}\irelop \\&&|&
+     \X{fxx}\K{.}\frelop \\&&|&
+     \K{i8x16.}\viunop ~|~
+     \K{i16x8.}\viunop ~|~
+     \K{i32x4.}\viunop \\&&|&
+     \K{i64x2.}\NEG \\&&|&
+     \X{fxx.}\vfunop \\&&|&
+     \K{i8x16.}\vtestop ~|~
+     \K{i16x8.}\vtestop ~|~
+     \K{i32x4.}\vtestop \\&&|&
+     \K{i8x16.}\BITMASK ~|~
+     \K{i16x8.}\BITMASK ~|~
+     \K{i32x4.}\BITMASK \\&&|&
+     \K{i8x16.}\NARROW\K{\_i16x8\_}\sx ~|~
+     \K{i16x8.}\NARROW\K{\_i32x4\_}\sx \\&&|&
+     \K{i16x8.}\WIDEN\K{\_low}\K{\_i8x16\_}\sx ~|~
+     \K{i32x4.}\WIDEN\K{\_low}\K{\_i16x8\_}\sx \\&&|&
+     \K{i16x8.}\WIDEN\K{\_high}\K{\_i8x16\_}\sx ~|~
+     \K{i32x4.}\WIDEN\K{\_high}\K{\_i16x8\_}\sx \\&&|&
+     \X{ixx}\K{.}\vshiftop \\&&|&
+     \K{i8x16.}\vibinop ~|~
+     \K{i16x8.}\vibinop ~|~
+     \K{i32x4.}\vibinop \\&&|&
+     \K{i64x2.}\ADD ~|~
+     \K{i64x2.}\SUB \\&&|&
+     \K{i8x16.}\vsatbinop ~|~
+     \K{i16x8.}\vsatbinop \\&&|&
+     \K{i16x8.}\K{mul} ~|~
+     \K{i32x4.}\K{mul} ~|~
+     \K{i64x2.}\K{mul} \\&&|&
+     \K{i8x16.}\AVGRU ~|~
+     \K{i16x8.}\AVGRU \\&&|&
+     \X{fxx.}\vfbinop \\&&|&
+     \K{i32x4.}\TRUNC\K{\_sat\_f32x4\_}\sx ~|~
+     \K{f32x4.}\CONVERT\K{\_i32x4\_}\sx \\&&|&
+     \dots \\
+   \production{SIMD unary operator} & \vunop &::=&
+     \K{not} \\
+   \production{SIMD binary operator} & \vbinop &::=&
+     \K{and} ~|~
+     \K{andnot} ~|~
+     \K{or} ~|~
+     \K{xor} \\
+   \production{SIMD ternary operator} & \vternop &::=&
+     \K{bitselect} \\
+   \production{integer test operator} & \itestop &::=&
+     \K{eqz} \\
+   \production{SIMD test operator} & \vtestop &::=&
+     \K{any\_true} ~|~
+     \K{all\_true} \\
+   \production{integer relational operator} & \irelop &::=&
+     \K{eq} ~|~
+     \K{ne} ~|~
+     \K{lt\_}\sx ~|~
+     \K{gt\_}\sx ~|~
+     \K{le\_}\sx ~|~
+     \K{ge\_}\sx \\
+   \production{floating-point relational operator} & \frelop &::=&
+     \K{eq} ~|~
+     \K{ne} ~|~
+     \K{lt} ~|~
+     \K{gt} ~|~
+     \K{le} ~|~
+     \K{ge} \\
+   \production{SIMD integer shift operator} & \vshiftop &::=&
+     \K{shl} ~|~
+     \K{shr\_s} ~|~
+     \K{shr\_u} \\
+   \production{SIMD integer unary operator} & \viunop &::=&
+     \K{abs} ~|~
+     \K{neg} \\
+   \production{SIMD integer binary operator} & \vibinop &::=&
+     \K{add} ~|~
+     \K{sub} ~|~
+     \K{min\_}\sx ~|~
+     \K{max\_}\sx \\
+   \production{SIMD integer saturating binary operator} & \vsatbinop &::=&
+     \K{add\_sat\_}\sx ~|~
+     \K{sub\_sat\_}\sx \\
+   \production{SIMD floating-point unary operator} & \vfunop &::=&
+     \K{abs} ~|~
+     \K{neg} ~|~
+     \K{sqrt} \\
+   \production{SIMD floating-point binary operator} & \vfbinop &::=&
+     \K{add} ~|~
+     \K{sub} ~|~
+     \K{mul} ~|~
+     \K{div} ~|~
+     \K{min} ~|~
+     \K{max} \\
+   \end{array}
+
+
+.. todo::
+   describe SIMD Instructions
+
 Parametric Instructions
 ~~~~~~~~~~~~~~~~~~~~~~~
 

From 9ba8a4e9ec166993592a0b73d8dbf6020ddd8ad7 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Mon, 27 Jul 2020 15:22:08 -0700
Subject: [PATCH 04/20] Remove vsss production, use v128. use nnn as the value

---
 document/core/syntax/instructions.rst | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 960b6e927..0994befef 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -206,8 +206,6 @@ SIMD Instructions
 
 .. math::
    \begin{array}{llcl}
-   \production{simdwidth} & \X{sss} &::=&
-     \K{128} \\
    \production{signedness} & \sx &::=&
      \K{u} ~|~ \K{s} \\
    \production{ishape} & \X{ixx} &::=&
@@ -217,10 +215,10 @@ SIMD Instructions
    \production{vshape} & \X{vxx} &::=&
      \X{ixx} ~|~ \X{fxx} \\
    \production{instruction} & \instr &::=&
-     \K{v}\X{sss}\K{.}\CONST~\xref{syntax/values}{syntax-simd}{\vX{\X{sss}}} \\&&|&
-     \K{v}\X{sss}\K{.}\vunop \\&&|&
-     \K{v}\X{sss}\K{.}\vbinop \\&&|&
-     \K{v}\X{sss}\K{.}\vternop \\&&|&
+     \K{v128.}\CONST~\xref{syntax/values}{syntax-simd}{\vX{\X{nnn}}} \\&&|&
+     \K{v128.}\vunop \\&&|&
+     \K{v128.}\vbinop \\&&|&
+     \K{v128.}\vternop \\&&|&
      \K{v8x16.}\SHUFFLE ~|~ \K{v8x16.}\SWIZZLE \\&&|&
      \X{vxx}\K{.}\SPLAT \\&&|&
      \K{i8x16.}\EXTRACTLANE\K{\_}\sx ~|~
@@ -274,8 +272,6 @@ SIMD Instructions
      \K{xor} \\
    \production{SIMD ternary operator} & \vternop &::=&
      \K{bitselect} \\
-   \production{integer test operator} & \itestop &::=&
-     \K{eqz} \\
    \production{SIMD test operator} & \vtestop &::=&
      \K{any\_true} ~|~
      \K{all\_true} \\
@@ -387,10 +383,10 @@ Instructions in this group are concerned with linear :ref:`memory <syntax-mem>`.
      \dots \\&&|&
      \K{i}\X{nn}\K{.}\LOAD~\memarg ~|~
      \K{f}\X{nn}\K{.}\LOAD~\memarg ~|~
-     \K{v}\X{sss}\K{.}\LOAD~\memarg \\&&|&
+     \K{v128.}\LOAD~\memarg \\&&|&
      \K{i}\X{nn}\K{.}\STORE~\memarg ~|~
      \K{f}\X{nn}\K{.}\STORE~\memarg ~|~
-     \K{v}\X{sss}\K{.}\STORE~\memarg \\&&|&
+     \K{v128.}\STORE~\memarg \\&&|&
      \K{i}\X{nn}\K{.}\LOAD\K{8\_}\sx~\memarg ~|~
      \K{i}\X{nn}\K{.}\LOAD\K{16\_}\sx~\memarg ~|~
      \K{i64.}\LOAD\K{32\_}\sx~\memarg \\&&|&
@@ -400,7 +396,7 @@ Instructions in this group are concerned with linear :ref:`memory <syntax-mem>`.
      \K{i16x8.}\LOAD\K{8x8}\_\sx ~|~
      \K{i32x4.}\LOAD\K{16x4}\_\sx ~|~
      \K{i64x2.}\LOAD\K{32x2}\_\sx \\&&|&
-     \K{v}\X{ixx}\K{.}\LOAD\K{\_splat} \\&&|&
+     \K{v128.}\LOAD\K{\_splat} \\&&|&
      \MEMORYSIZE \\&&|&
      \MEMORYGROW \\
    \end{array}

From 2c08cdb618e80d58484963c84738ccbe1dd5dedc Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Mon, 27 Jul 2020 15:39:39 -0700
Subject: [PATCH 05/20] Add description to SIMD instructions

---
 document/core/syntax/instructions.rst | 92 +++++++++++++++++----------
 document/core/util/macros.def         |  6 +-
 2 files changed, 65 insertions(+), 33 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 0994befef..45442de49 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -43,9 +43,6 @@ Numeric Instructions
 Numeric instructions provide basic operations over numeric :ref:`values <syntax-value>` of specific :ref:`type <syntax-valtype>`.
 These operations closely match respective operations available in hardware.
 
-.. todo::
-   add a note about how 128-bit SIMD instructions are different from numeric instructions and are described in the section below.
-
 .. math::
    \begin{array}{llcl}
    \production{width} & \X{nn}, \X{mm} &::=&
@@ -142,20 +139,10 @@ For each type, several subcategories can be distinguished:
 * *Conversions*: consume a value of one type and produce a result of another
   (the source type of the conversion is the one after the ":math:`\K{\_}`").
 
-.. todo::
-  Do these subcategories have to cover every instruction? E.g. simd shifts don't fit anywhere here, since they take 128-bit int and a 32-bit int.
-
 Some integer instructions come in two flavors,
 where a signedness annotation |sx| distinguishes whether the operands are to be :ref:`interpreted <aux-signed>` as :ref:`unsigned <syntax-uint>` or :ref:`signed <syntax-sint>` integers.
 For the other integer instructions, the use of two's complement for the signed interpretation means that they behave the same regardless of signedness.
 
-Instructions that operate on |V128| operands have a naming convention that
-determines how those operands will be interpreted. An instruction beginning with :math:`\K{i32x4}`
-will interpret its operands as four |i32|, packed side-by-side into a |i128|.
-Similarly, and instruction beginning with :math:`\K{f64x2}` interprets its operands as two |f64|, packed side-by-side into a |i128|.
-
-.. todo::
-  write up runtime interpretation for the lane shapes
 
 Conventions
 ...........
@@ -183,15 +170,13 @@ Occasionally, it is convenient to group operators together according to the foll
    \end{array}
 
 
-.. index:: ! parametric instruction, value type
-   pair: abstract syntax; instruction
-.. _syntax-instr-parametric:
-
 .. index:: ! simd instruction, fixed-width simd, value, value type
    pair: abstract syntax; instruction
 .. _syntax-vunop:
 .. _syntax-vbinop:
-.. _syntax-vternop:
+.. _syntax-vsunop:
+.. _syntax-vsbinop:
+.. _syntax-vsternop:
 .. _syntax-vtestop:
 .. _syntax-vshiftop:
 .. _syntax-viunop:
@@ -199,26 +184,30 @@ Occasionally, it is convenient to group operators together according to the foll
 .. _syntax-vsatbinop:
 .. _syntax-vfunop:
 .. _syntax-vfbinop:
+.. _syntax-virelop:
+.. _syntax-vfrelop:
 .. _syntax-instr-simd:
 
 SIMD Instructions
 ~~~~~~~~~~~~~~~~~~~~~~~
 
+SIMD instructions provide basic operations over :ref:`values <syntax-value>` of type |V128|.
+
 .. math::
    \begin{array}{llcl}
-   \production{signedness} & \sx &::=&
-     \K{u} ~|~ \K{s} \\
    \production{ishape} & \X{ixx} &::=&
      \K{i8x16} ~|~ \K{i16x8} ~|~ \K{i32x4} ~|~ \K{i64x2} \\
    \production{fshape} & \X{fxx} &::=&
      \K{f32x4} ~|~ \K{f64x2} \\
    \production{vshape} & \X{vxx} &::=&
      \X{ixx} ~|~ \X{fxx} \\
+   \production{signedness} & \sx &::=&
+     \K{u} ~|~ \K{s} \\
    \production{instruction} & \instr &::=&
      \K{v128.}\CONST~\xref{syntax/values}{syntax-simd}{\vX{\X{nnn}}} \\&&|&
-     \K{v128.}\vunop \\&&|&
-     \K{v128.}\vbinop \\&&|&
-     \K{v128.}\vternop \\&&|&
+     \K{v128.}\vsunop \\&&|&
+     \K{v128.}\vsbinop \\&&|&
+     \K{v128.}\vsternop \\&&|&
      \K{v8x16.}\SHUFFLE ~|~ \K{v8x16.}\SWIZZLE \\&&|&
      \X{vxx}\K{.}\SPLAT \\&&|&
      \K{i8x16.}\EXTRACTLANE\K{\_}\sx ~|~
@@ -227,8 +216,8 @@ SIMD Instructions
      \K{i64x2.}\EXTRACTLANE \\&&|&
      \X{fxx}\K{.}\EXTRACTLANE \\&&|&
      \X{vxx}\K{.}\REPLACELANE \\&&|&
-     \X{ixx}\K{.}\irelop \\&&|&
-     \X{fxx}\K{.}\frelop \\&&|&
+     \X{ixx}\K{.}\virelop \\&&|&
+     \X{fxx}\K{.}\vfrelop \\&&|&
      \K{i8x16.}\viunop ~|~
      \K{i16x8.}\viunop ~|~
      \K{i32x4.}\viunop \\&&|&
@@ -263,26 +252,26 @@ SIMD Instructions
      \K{i32x4.}\TRUNC\K{\_sat\_f32x4\_}\sx ~|~
      \K{f32x4.}\CONVERT\K{\_i32x4\_}\sx \\&&|&
      \dots \\
-   \production{SIMD unary operator} & \vunop &::=&
+   \production{SIMD unary operator} & \vsunop &::=&
      \K{not} \\
-   \production{SIMD binary operator} & \vbinop &::=&
+   \production{SIMD binary operator} & \vsbinop &::=&
      \K{and} ~|~
      \K{andnot} ~|~
      \K{or} ~|~
      \K{xor} \\
-   \production{SIMD ternary operator} & \vternop &::=&
+   \production{SIMD ternary operator} & \vsternop &::=&
      \K{bitselect} \\
    \production{SIMD test operator} & \vtestop &::=&
      \K{any\_true} ~|~
      \K{all\_true} \\
-   \production{integer relational operator} & \irelop &::=&
+   \production{SIMD integer relational operator} & \virelop &::=&
      \K{eq} ~|~
      \K{ne} ~|~
      \K{lt\_}\sx ~|~
      \K{gt\_}\sx ~|~
      \K{le\_}\sx ~|~
      \K{ge\_}\sx \\
-   \production{floating-point relational operator} & \frelop &::=&
+   \production{SIMD floating-point relational operator} & \vfrelop &::=&
      \K{eq} ~|~
      \K{ne} ~|~
      \K{lt} ~|~
@@ -317,9 +306,48 @@ SIMD Instructions
      \K{max} \\
    \end{array}
 
+SIMD instructions have a naming convention that
+determines how their operands will be interpreted. An instruction beginning with :math:`\K{i32x4}`
+will interpret its operands as four |i32|, packed side-by-side into a |i128|.
+This prefix, :math:`\K{i32x4}`, is known as the *shape* of the type, and is made up of the underlying element type, :math:`\K{i32}`, and the number of elements or *lanes*, :math:`\K{4}`. Operations are performed lane-wise on each element.
+
+An instruction that begins with :math:`\K{v128}` is not concerned about the underlying element type, and treats the entire |V128| as a |i128|.
 
 .. todo::
-   describe SIMD Instructions
+  write up runtime interpretation for the lane shapes
+
+SIMD instructions can be grouped into several subcategories:
+
+* *Constants*: return a static constant.
+
+* *Unary Operations*: consume one |V128| operand and produce one |V128| result.
+
+* *Binary Operations*: consume two |V128| operands and produce one |V128| result.
+
+* *Tests*: consume one |V128| operand and produce a Boolean integer result.
+
+* *Shifts*: consume a |v128| operand and a |i32| operand, producing one |V128| result.
+
+* *Extract lanes*: consume a |V128| operand and an immediate byte specifying the lane index and produce a result of the element type.
+
+* *Replace lanes*: consume a |V128| operand, an immediate byte specifying the lane index, and a value of the element type, and produce a |V128| result.
+
+* *Conversions/Splats*: consume a value of the integer or floating-point type and produce a |V128| result of a specified shape.
+
+.. todo::
+   should comparisons be called out in a separate subcategory? they are essentially the same as binary operations
+   should (v128) converions be called out in a separate subcategory? they have the same signature as unary opreations.
+
+.. * *Conversions*: consume a |V128| operand and produce a |V128| result. Lane-wise conversion from the source element type to the destination element type (the source type of the conversion is the one after the first ":math:`\K{\_}`").
+
+
+Some SIMD instructions have a signedness annotation |sx| which distinguishes whether the elements in the operands are to be :ref:`interpreted <aux-signed>` as :ref:`unsigned <syntax-uint>` or :ref:`signed <syntax-sint>` integers.
+For the other SIMD instructions, the use of two's complement for the signed interpretation means that they behave the same regardless of signedness.
+
+
+.. index:: ! parametric instruction, value type
+   pair: abstract syntax; instruction
+.. _syntax-instr-parametric:
 
 Parametric Instructions
 ~~~~~~~~~~~~~~~~~~~~~~~
@@ -396,7 +424,7 @@ Instructions in this group are concerned with linear :ref:`memory <syntax-mem>`.
      \K{i16x8.}\LOAD\K{8x8}\_\sx ~|~
      \K{i32x4.}\LOAD\K{16x4}\_\sx ~|~
      \K{i64x2.}\LOAD\K{32x2}\_\sx \\&&|&
-     \K{v128.}\LOAD\K{\_splat} \\&&|&
+     \X{vxx}\K{.}\LOAD\K{\_splat} \\&&|&
      \MEMORYSIZE \\&&|&
      \MEMORYGROW \\
    \end{array}
diff --git a/document/core/util/macros.def b/document/core/util/macros.def
index 869644386..6e2a5e5c7 100644
--- a/document/core/util/macros.def
+++ b/document/core/util/macros.def
@@ -410,7 +410,9 @@
 
 .. |vunop| mathdef:: \xref{syntax/instructions}{syntax-vunop}{\X{vunop}}
 .. |vbinop| mathdef:: \xref{syntax/instructions}{syntax-vbinop}{\X{vbinop}}
-.. |vternop| mathdef:: \xref{syntax/instructions}{syntax-vternop}{\X{vternop}}
+.. |vsunop| mathdef:: \xref{syntax/instructions}{syntax-vsunop}{\X{vsunop}}
+.. |vsbinop| mathdef:: \xref{syntax/instructions}{syntax-vsbinop}{\X{vsbinop}}
+.. |vsternop| mathdef:: \xref{syntax/instructions}{syntax-vsternop}{\X{vsternop}}
 .. |vtestop| mathdef:: \xref{syntax/instructions}{syntax-vtestop}{\X{vtestop}}
 .. |vshiftop| mathdef:: \xref{syntax/instructions}{syntax-vshiftop}{\X{vshiftop}}
 .. |viunop| mathdef:: \xref{syntax/instructions}{syntax-viunop}{\X{viunop}}
@@ -418,6 +420,8 @@
 .. |vsatbinop| mathdef:: \xref{syntax/instructions}{syntax-vsatbinop}{\X{vsatbinop}}
 .. |vfunop| mathdef:: \xref{syntax/instructions}{syntax-vfunop}{\X{vfunop}}
 .. |vfbinop| mathdef:: \xref{syntax/instructions}{syntax-vfbinop}{\X{vfbinop}}
+.. |virelop| mathdef:: \xref{syntax/instructions}{syntax-virelop}{\X{virelop}}
+.. |vfrelop| mathdef:: \xref{syntax/instructions}{syntax-vfrelop}{\X{vfrelop}}
 
 .. |sx| mathdef:: \xref{syntax/instructions}{syntax-sx}{\X{sx}}
 .. |memarg| mathdef:: \xref{syntax/instructions}{syntax-memarg}{\X{memarg}}

From 8a95b1d8b02a68fd7a068f95ee4a8bc496e5aafa Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Wed, 29 Jul 2020 14:07:38 -0700
Subject: [PATCH 06/20] Add ternary subcategory

---
 document/core/syntax/instructions.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 45442de49..5f88564e0 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -324,6 +324,8 @@ SIMD instructions can be grouped into several subcategories:
 
 * *Binary Operations*: consume two |V128| operands and produce one |V128| result.
 
+* *Ternary Operations*: consume three |V128| operands and produce one |V128| result.
+
 * *Tests*: consume one |V128| operand and produce a Boolean integer result.
 
 * *Shifts*: consume a |v128| operand and a |i32| operand, producing one |V128| result.

From 1a8af6d8911a13966a9ac30b57a5cf245b3d617f Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Wed, 29 Jul 2020 14:19:33 -0700
Subject: [PATCH 07/20] Add paragraph on load splat and load extends

---
 document/core/syntax/instructions.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 5f88564e0..e27d80196 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -436,6 +436,8 @@ They all take a *memory immediate* |memarg| that contains an address *offset* an
 Integer loads and stores can optionally specify a *storage size* that is smaller than the :ref:`bit width <syntax-valtype>` of the respective value type.
 In the case of loads, a sign extension mode |sx| is then required to select appropriate behavior.
 
+SIMD loads can specify a shape that is half the :ref:`bit width <syntax-valtype>` of |V128|. Each lane is half its usual size, and the sign extension mode |sx| then specifies how the smaller lane is extended to the larger lane. SIMD loads can be annotated with *splat*, to indicate that only a single lane of the respective shape is loaded, and the result is duplicated to all other lanes.
+
 The static address offset is added to the dynamic address operand, yielding a 33 bit *effective address* that is the zero-based index at which the memory is accessed.
 All values are read and written in |LittleEndian|_ byte order.
 A :ref:`trap <trap>` results if any of the accessed memory bytes lies outside the address range implied by the memory's current size.

From de59417eb28bc90ea801259ac185e6120fd095b3 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Wed, 29 Jul 2020 14:25:52 -0700
Subject: [PATCH 08/20] Rename Conversions to Splats

---
 document/core/syntax/instructions.rst | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index e27d80196..28d9bce12 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -330,19 +330,12 @@ SIMD instructions can be grouped into several subcategories:
 
 * *Shifts*: consume a |v128| operand and a |i32| operand, producing one |V128| result.
 
+* *Splats*: consume a value of the integer or floating-point type and produce a |V128| result of a specified shape.
+
 * *Extract lanes*: consume a |V128| operand and an immediate byte specifying the lane index and produce a result of the element type.
 
 * *Replace lanes*: consume a |V128| operand, an immediate byte specifying the lane index, and a value of the element type, and produce a |V128| result.
 
-* *Conversions/Splats*: consume a value of the integer or floating-point type and produce a |V128| result of a specified shape.
-
-.. todo::
-   should comparisons be called out in a separate subcategory? they are essentially the same as binary operations
-   should (v128) converions be called out in a separate subcategory? they have the same signature as unary opreations.
-
-.. * *Conversions*: consume a |V128| operand and produce a |V128| result. Lane-wise conversion from the source element type to the destination element type (the source type of the conversion is the one after the first ":math:`\K{\_}`").
-
-
 Some SIMD instructions have a signedness annotation |sx| which distinguishes whether the elements in the operands are to be :ref:`interpreted <aux-signed>` as :ref:`unsigned <syntax-uint>` or :ref:`signed <syntax-sint>` integers.
 For the other SIMD instructions, the use of two's complement for the signed interpretation means that they behave the same regardless of signedness.
 

From 3e9eab7f5c7d76814cf0def05bb04acda7820504 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <ngzhian@gmail.com>
Date: Wed, 5 Aug 2020 13:43:53 -0700
Subject: [PATCH 09/20] Update SIMD instruction naming convention description

Co-authored-by: Andreas Rossberg <rossberg@mpi-sws.org>
---
 document/core/syntax/instructions.rst | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 28d9bce12..a3431e0a2 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -306,10 +306,16 @@ SIMD instructions provide basic operations over :ref:`values <syntax-value>` of
      \K{max} \\
    \end{array}
 
-SIMD instructions have a naming convention that
-determines how their operands will be interpreted. An instruction beginning with :math:`\K{i32x4}`
-will interpret its operands as four |i32|, packed side-by-side into a |i128|.
-This prefix, :math:`\K{i32x4}`, is known as the *shape* of the type, and is made up of the underlying element type, :math:`\K{i32}`, and the number of elements or *lanes*, :math:`\K{4}`. Operations are performed lane-wise on each element.
+SIMD instructions have a naming convention involving a prefix that
+determines how their operands will be interpreted.
+This prefix describes the *shape* of the operand,
+written :math:`t\K{x}N`, and consisting of a packed numeric type :math:`t` and the number of *lanes* :math:`N` of that type.
+Operations are performed point-wise on the values of each lane.
+
+.. note::
+   For example, the shape :math:`\K{i32x4}` interprets the operand
+as four |i32| values, packed into an |i128|.
+   The bitwidth of the numeric type :math:`t` times :math:`N` always is 128.
 
 An instruction that begins with :math:`\K{v128}` is not concerned about the underlying element type, and treats the entire |V128| as a |i128|.
 

From eebf2b8552b13ecee7ac0e626403bf6396f7e310 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <ngzhian@gmail.com>
Date: Wed, 5 Aug 2020 13:44:15 -0700
Subject: [PATCH 10/20] Update SIMD instruction prefix paragraph

Co-authored-by: Andreas Rossberg <rossberg@mpi-sws.org>
---
 document/core/syntax/instructions.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index a3431e0a2..6fe827bd0 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -317,7 +317,7 @@ Operations are performed point-wise on the values of each lane.
 as four |i32| values, packed into an |i128|.
    The bitwidth of the numeric type :math:`t` times :math:`N` always is 128.
 
-An instruction that begins with :math:`\K{v128}` is not concerned about the underlying element type, and treats the entire |V128| as a |i128|.
+Instructions prefixed with :math:`\K{v128}` do not involve a specific interpretation, and treat the |V128| as an |i128| value or a vector of 128 individual bits.
 
 .. todo::
   write up runtime interpretation for the lane shapes

From e77dad3d950ad4dda30c4688b868011b3506b1db Mon Sep 17 00:00:00 2001
From: Ng Zhi An <ngzhian@gmail.com>
Date: Wed, 5 Aug 2020 13:45:30 -0700
Subject: [PATCH 11/20] Update SIMD instruction subcategories description

Co-authored-by: Andreas Rossberg <rossberg@mpi-sws.org>
---
 document/core/syntax/instructions.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 6fe827bd0..0f21071dd 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -336,11 +336,11 @@ SIMD instructions can be grouped into several subcategories:
 
 * *Shifts*: consume a |v128| operand and a |i32| operand, producing one |V128| result.
 
-* *Splats*: consume a value of the integer or floating-point type and produce a |V128| result of a specified shape.
+* *Splats*: consume a value of numeric type and produce a |V128| result of a specified shape.
 
-* *Extract lanes*: consume a |V128| operand and an immediate byte specifying the lane index and produce a result of the element type.
+* *Extract lanes*: consume a |V128| operand and return the numeric value in a given lane.
 
-* *Replace lanes*: consume a |V128| operand, an immediate byte specifying the lane index, and a value of the element type, and produce a |V128| result.
+* *Replace lanes*: consume a |V128| operand and a numeric value for a given lane, and produce a |V128| result.
 
 Some SIMD instructions have a signedness annotation |sx| which distinguishes whether the elements in the operands are to be :ref:`interpreted <aux-signed>` as :ref:`unsigned <syntax-uint>` or :ref:`signed <syntax-sint>` integers.
 For the other SIMD instructions, the use of two's complement for the signed interpretation means that they behave the same regardless of signedness.

From d1a762d3c16309382467fb6336cd5a8974b8066a Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Wed, 5 Aug 2020 13:47:44 -0700
Subject: [PATCH 12/20] Fix underline, remove dup sx definition, add dots to
 extend instr

---
 document/core/syntax/instructions.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 0f21071dd..b54816e92 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -189,7 +189,7 @@ Occasionally, it is convenient to group operators together according to the foll
 .. _syntax-instr-simd:
 
 SIMD Instructions
-~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~
 
 SIMD instructions provide basic operations over :ref:`values <syntax-value>` of type |V128|.
 
@@ -201,9 +201,8 @@ SIMD instructions provide basic operations over :ref:`values <syntax-value>` of
      \K{f32x4} ~|~ \K{f64x2} \\
    \production{vshape} & \X{vxx} &::=&
      \X{ixx} ~|~ \X{fxx} \\
-   \production{signedness} & \sx &::=&
-     \K{u} ~|~ \K{s} \\
    \production{instruction} & \instr &::=&
+     \dots \\&&|&
      \K{v128.}\CONST~\xref{syntax/values}{syntax-simd}{\vX{\X{nnn}}} \\&&|&
      \K{v128.}\vsunop \\&&|&
      \K{v128.}\vsbinop \\&&|&

From cbb383ee5e41e58a43c75544c33c86eab514554c Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Wed, 5 Aug 2020 14:39:27 -0700
Subject: [PATCH 13/20] Add viminmaxop

---
 document/core/syntax/instructions.rst | 13 +++++++------
 document/core/util/macros.def         |  1 +
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index b54816e92..4c1d25f15 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -181,6 +181,7 @@ Occasionally, it is convenient to group operators together according to the foll
 .. _syntax-vshiftop:
 .. _syntax-viunop:
 .. _syntax-vibinop:
+.. _syntax-viminmaxop:
 .. _syntax-vsatbinop:
 .. _syntax-vfunop:
 .. _syntax-vfbinop:
@@ -235,11 +236,10 @@ SIMD instructions provide basic operations over :ref:`values <syntax-value>` of
      \K{i16x8.}\WIDEN\K{\_high}\K{\_i8x16\_}\sx ~|~
      \K{i32x4.}\WIDEN\K{\_high}\K{\_i16x8\_}\sx \\&&|&
      \X{ixx}\K{.}\vshiftop \\&&|&
-     \K{i8x16.}\vibinop ~|~
-     \K{i16x8.}\vibinop ~|~
-     \K{i32x4.}\vibinop \\&&|&
-     \K{i64x2.}\ADD ~|~
-     \K{i64x2.}\SUB \\&&|&
+     \X{ixx}\K{.}\vibinop \\&&|&
+     \K{i8x16.}\viminmaxop ~|~
+     \K{i16x8.}\viminmaxop ~|~
+     \K{i32x4.}\viminmaxop \\&&|&
      \K{i8x16.}\vsatbinop ~|~
      \K{i16x8.}\vsatbinop \\&&|&
      \K{i16x8.}\K{mul} ~|~
@@ -286,7 +286,8 @@ SIMD instructions provide basic operations over :ref:`values <syntax-value>` of
      \K{neg} \\
    \production{SIMD integer binary operator} & \vibinop &::=&
      \K{add} ~|~
-     \K{sub} ~|~
+     \K{sub} \\
+   \production{SIMD integer binary min/max operator} & \viminmaxop &::=&
      \K{min\_}\sx ~|~
      \K{max\_}\sx \\
    \production{SIMD integer saturating binary operator} & \vsatbinop &::=&
diff --git a/document/core/util/macros.def b/document/core/util/macros.def
index 6e2a5e5c7..9e1a7a561 100644
--- a/document/core/util/macros.def
+++ b/document/core/util/macros.def
@@ -417,6 +417,7 @@
 .. |vshiftop| mathdef:: \xref{syntax/instructions}{syntax-vshiftop}{\X{vshiftop}}
 .. |viunop| mathdef:: \xref{syntax/instructions}{syntax-viunop}{\X{viunop}}
 .. |vibinop| mathdef:: \xref{syntax/instructions}{syntax-vibinop}{\X{vibinop}}
+.. |viminmaxop| mathdef:: \xref{syntax/instructions}{syntax-viminmaxop}{\X{viminmaxop}}
 .. |vsatbinop| mathdef:: \xref{syntax/instructions}{syntax-vsatbinop}{\X{vsatbinop}}
 .. |vfunop| mathdef:: \xref{syntax/instructions}{syntax-vfunop}{\X{vfunop}}
 .. |vfbinop| mathdef:: \xref{syntax/instructions}{syntax-vfbinop}{\X{vfbinop}}

From 2aec40c6aac5e85ea3d0496ad6b0dc720f033483 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Wed, 5 Aug 2020 14:50:55 -0700
Subject: [PATCH 14/20] Fix note formatting

---
 document/core/syntax/instructions.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 4c1d25f15..059361eae 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -314,7 +314,7 @@ Operations are performed point-wise on the values of each lane.
 
 .. note::
    For example, the shape :math:`\K{i32x4}` interprets the operand
-as four |i32| values, packed into an |i128|.
+   as four |i32| values, packed into an |i128|.
    The bitwidth of the numeric type :math:`t` times :math:`N` always is 128.
 
 Instructions prefixed with :math:`\K{v128}` do not involve a specific interpretation, and treat the |V128| as an |i128| value or a vector of 128 individual bits.

From 18049c7a942cc0b3565cc1e78f5c5d14ffd7f611 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Thu, 6 Aug 2020 16:21:51 -0700
Subject: [PATCH 15/20] Introduce laneidx production (== byte), add immediates
 to instructions

---
 document/core/syntax/instructions.rst | 16 +++++++++-------
 document/core/util/macros.def         |  1 +
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 059361eae..66c87e468 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -172,6 +172,7 @@ Occasionally, it is convenient to group operators together according to the foll
 
 .. index:: ! simd instruction, fixed-width simd, value, value type
    pair: abstract syntax; instruction
+.. _syntax-laneidx:
 .. _syntax-vunop:
 .. _syntax-vbinop:
 .. _syntax-vsunop:
@@ -202,20 +203,21 @@ SIMD instructions provide basic operations over :ref:`values <syntax-value>` of
      \K{f32x4} ~|~ \K{f64x2} \\
    \production{vshape} & \X{vxx} &::=&
      \X{ixx} ~|~ \X{fxx} \\
+   \production{lane index} & \laneidx &::=& \byte \\
    \production{instruction} & \instr &::=&
      \dots \\&&|&
      \K{v128.}\CONST~\xref{syntax/values}{syntax-simd}{\vX{\X{nnn}}} \\&&|&
      \K{v128.}\vsunop \\&&|&
      \K{v128.}\vsbinop \\&&|&
      \K{v128.}\vsternop \\&&|&
-     \K{v8x16.}\SHUFFLE ~|~ \K{v8x16.}\SWIZZLE \\&&|&
+     \K{v8x16.}\SHUFFLE~\laneidx^{16} ~|~ \K{v8x16.}\SWIZZLE \\&&|&
      \X{vxx}\K{.}\SPLAT \\&&|&
-     \K{i8x16.}\EXTRACTLANE\K{\_}\sx ~|~
-     \K{i16x8.}\EXTRACTLANE\K{\_}\sx \\&&|&
-     \K{i32x4.}\EXTRACTLANE ~|~
-     \K{i64x2.}\EXTRACTLANE \\&&|&
-     \X{fxx}\K{.}\EXTRACTLANE \\&&|&
-     \X{vxx}\K{.}\REPLACELANE \\&&|&
+     \K{i8x16.}\EXTRACTLANE\K{\_}\sx~\laneidx ~|~
+     \K{i16x8.}\EXTRACTLANE\K{\_}\sx~\laneidx \\&&|&
+     \K{i32x4.}\EXTRACTLANE~\laneidx ~|~
+     \K{i64x2.}\EXTRACTLANE~\laneidx \\&&|&
+     \X{fxx}\K{.}\EXTRACTLANE~\laneidx \\&&|&
+     \X{vxx}\K{.}\REPLACELANE~\laneidx \\&&|&
      \X{ixx}\K{.}\virelop \\&&|&
      \X{fxx}\K{.}\vfrelop \\&&|&
      \K{i8x16.}\viunop ~|~
diff --git a/document/core/util/macros.def b/document/core/util/macros.def
index 9e1a7a561..91353a151 100644
--- a/document/core/util/macros.def
+++ b/document/core/util/macros.def
@@ -408,6 +408,7 @@
 .. |ftestop| mathdef:: \xref{syntax/instructions}{syntax-ftestop}{\X{ftestop}}
 .. |frelop| mathdef:: \xref{syntax/instructions}{syntax-frelop}{\X{frelop}}
 
+.. |laneidx| mathdef:: \xref{syntax/instructions}{syntax-laneidx}{\X{laneidx}}
 .. |vunop| mathdef:: \xref{syntax/instructions}{syntax-vunop}{\X{vunop}}
 .. |vbinop| mathdef:: \xref{syntax/instructions}{syntax-vbinop}{\X{vbinop}}
 .. |vsunop| mathdef:: \xref{syntax/instructions}{syntax-vsunop}{\X{vsunop}}

From e38b3b98c8154c6fdfa310862da85f70488754e0 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Thu, 6 Aug 2020 16:51:16 -0700
Subject: [PATCH 16/20] Add memarg to SIMD memory instructions

---
 document/core/syntax/instructions.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 66c87e468..55aec64e6 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -424,10 +424,10 @@ Instructions in this group are concerned with linear :ref:`memory <syntax-mem>`.
      \K{i}\X{nn}\K{.}\STORE\K{8}~\memarg ~|~
      \K{i}\X{nn}\K{.}\STORE\K{16}~\memarg ~|~
      \K{i64.}\STORE\K{32}~\memarg \\&&|&
-     \K{i16x8.}\LOAD\K{8x8}\_\sx ~|~
-     \K{i32x4.}\LOAD\K{16x4}\_\sx ~|~
-     \K{i64x2.}\LOAD\K{32x2}\_\sx \\&&|&
-     \X{vxx}\K{.}\LOAD\K{\_splat} \\&&|&
+     \K{i16x8.}\LOAD\K{8x8}\_\sx~\memarg ~|~
+     \K{i32x4.}\LOAD\K{16x4}\_\sx~\memarg ~|~
+     \K{i64x2.}\LOAD\K{32x2}\_\sx~\memarg \\&&|&
+     \X{vxx}\K{.}\LOAD\K{\_splat}~\memarg \\&&|&
      \MEMORYSIZE \\&&|&
      \MEMORYGROW \\
    \end{array}

From 82bb9693eec55e11542c377512c7f28c8404381f Mon Sep 17 00:00:00 2001
From: Ng Zhi An <ngzhian@gmail.com>
Date: Mon, 10 Aug 2020 09:37:31 -0700
Subject: [PATCH 17/20] Alter description of SIMD splats

Co-authored-by: Andreas Rossberg <rossberg@mpi-sws.org>
---
 document/core/syntax/instructions.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 55aec64e6..2b9b2d159 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -437,7 +437,7 @@ They all take a *memory immediate* |memarg| that contains an address *offset* an
 Integer loads and stores can optionally specify a *storage size* that is smaller than the :ref:`bit width <syntax-valtype>` of the respective value type.
 In the case of loads, a sign extension mode |sx| is then required to select appropriate behavior.
 
-SIMD loads can specify a shape that is half the :ref:`bit width <syntax-valtype>` of |V128|. Each lane is half its usual size, and the sign extension mode |sx| then specifies how the smaller lane is extended to the larger lane. SIMD loads can be annotated with *splat*, to indicate that only a single lane of the respective shape is loaded, and the result is duplicated to all other lanes.
+SIMD loads can specify a shape that is half the :ref:`bit width <syntax-valtype>` of |V128|. Each lane is half its usual size, and the sign extension mode |sx| then specifies how the smaller lane is extended to the larger lane. Alternatively, SIMD loads can perform a *splat*, such that only a single lane of the respective shape is loaded, and the result is duplicated to all other lanes.
 
 The static address offset is added to the dynamic address operand, yielding a 33 bit *effective address* that is the zero-based index at which the memory is accessed.
 All values are read and written in |LittleEndian|_ byte order.

From 6d1419925a3875397bc0c66bc34a00881e85b24e Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Mon, 24 Aug 2020 18:14:34 -0700
Subject: [PATCH 18/20] Update names of load splat, load extends, swizzle, and
 shuffle

---
 document/core/syntax/instructions.rst | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 2b9b2d159..78d61be45 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -210,7 +210,7 @@ SIMD instructions provide basic operations over :ref:`values <syntax-value>` of
      \K{v128.}\vsunop \\&&|&
      \K{v128.}\vsbinop \\&&|&
      \K{v128.}\vsternop \\&&|&
-     \K{v8x16.}\SHUFFLE~\laneidx^{16} ~|~ \K{v8x16.}\SWIZZLE \\&&|&
+     \K{i8x16.}\SHUFFLE~\laneidx^{16} ~|~ \K{i8x16.}\SWIZZLE \\&&|&
      \X{vxx}\K{.}\SPLAT \\&&|&
      \K{i8x16.}\EXTRACTLANE\K{\_}\sx~\laneidx ~|~
      \K{i16x8.}\EXTRACTLANE\K{\_}\sx~\laneidx \\&&|&
@@ -424,10 +424,13 @@ Instructions in this group are concerned with linear :ref:`memory <syntax-mem>`.
      \K{i}\X{nn}\K{.}\STORE\K{8}~\memarg ~|~
      \K{i}\X{nn}\K{.}\STORE\K{16}~\memarg ~|~
      \K{i64.}\STORE\K{32}~\memarg \\&&|&
-     \K{i16x8.}\LOAD\K{8x8}\_\sx~\memarg ~|~
-     \K{i32x4.}\LOAD\K{16x4}\_\sx~\memarg ~|~
-     \K{i64x2.}\LOAD\K{32x2}\_\sx~\memarg \\&&|&
-     \X{vxx}\K{.}\LOAD\K{\_splat}~\memarg \\&&|&
+     \K{v128.}\LOAD\K{8x8}\_\sx~\memarg ~|~
+     \K{v128.}\LOAD\K{16x4}\_\sx~\memarg ~|~
+     \K{v128.}\LOAD\K{32x2}\_\sx~\memarg \\&&|&
+     \K{v128.}\LOAD\K{8\_splat}~\memarg ~|~
+     \K{v128.}\LOAD\K{16\_splat}~\memarg \\&&|&
+     \K{v128.}\LOAD\K{32\_splat}~\memarg ~|~
+     \K{v128.}\LOAD\K{64\_splat}~\memarg \\&&|&
      \MEMORYSIZE \\&&|&
      \MEMORYGROW \\
    \end{array}
@@ -437,7 +440,7 @@ They all take a *memory immediate* |memarg| that contains an address *offset* an
 Integer loads and stores can optionally specify a *storage size* that is smaller than the :ref:`bit width <syntax-valtype>` of the respective value type.
 In the case of loads, a sign extension mode |sx| is then required to select appropriate behavior.
 
-SIMD loads can specify a shape that is half the :ref:`bit width <syntax-valtype>` of |V128|. Each lane is half its usual size, and the sign extension mode |sx| then specifies how the smaller lane is extended to the larger lane. Alternatively, SIMD loads can perform a *splat*, such that only a single lane of the respective shape is loaded, and the result is duplicated to all other lanes.
+SIMD loads can specify a shape that is half the :ref:`bit width <syntax-valtype>` of |V128|. Each lane is half its usual size, and the sign extension mode |sx| then specifies how the smaller lane is extended to the larger lane. Alternatively, SIMD loads can perform a *splat*, such that only a single lane of the specified storage size is loaded, and the result is duplicated to all lanes.
 
 The static address offset is added to the dynamic address operand, yielding a 33 bit *effective address* that is the zero-based index at which the memory is accessed.
 All values are read and written in |LittleEndian|_ byte order.

From d61eda88baa0de69fb7e2535fd89f4f1792bc12f Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Mon, 24 Aug 2020 18:20:36 -0700
Subject: [PATCH 19/20] Small reordering of index, remove unused macros

---
 document/core/syntax/instructions.rst | 6 ++----
 document/core/util/macros.def         | 2 --
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index 78d61be45..ac0dbff43 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -173,12 +173,12 @@ Occasionally, it is convenient to group operators together according to the foll
 .. index:: ! simd instruction, fixed-width simd, value, value type
    pair: abstract syntax; instruction
 .. _syntax-laneidx:
-.. _syntax-vunop:
-.. _syntax-vbinop:
 .. _syntax-vsunop:
 .. _syntax-vsbinop:
 .. _syntax-vsternop:
 .. _syntax-vtestop:
+.. _syntax-virelop:
+.. _syntax-vfrelop:
 .. _syntax-vshiftop:
 .. _syntax-viunop:
 .. _syntax-vibinop:
@@ -186,8 +186,6 @@ Occasionally, it is convenient to group operators together according to the foll
 .. _syntax-vsatbinop:
 .. _syntax-vfunop:
 .. _syntax-vfbinop:
-.. _syntax-virelop:
-.. _syntax-vfrelop:
 .. _syntax-instr-simd:
 
 SIMD Instructions
diff --git a/document/core/util/macros.def b/document/core/util/macros.def
index 91353a151..b37f9230b 100644
--- a/document/core/util/macros.def
+++ b/document/core/util/macros.def
@@ -409,8 +409,6 @@
 .. |frelop| mathdef:: \xref{syntax/instructions}{syntax-frelop}{\X{frelop}}
 
 .. |laneidx| mathdef:: \xref{syntax/instructions}{syntax-laneidx}{\X{laneidx}}
-.. |vunop| mathdef:: \xref{syntax/instructions}{syntax-vunop}{\X{vunop}}
-.. |vbinop| mathdef:: \xref{syntax/instructions}{syntax-vbinop}{\X{vbinop}}
 .. |vsunop| mathdef:: \xref{syntax/instructions}{syntax-vsunop}{\X{vsunop}}
 .. |vsbinop| mathdef:: \xref{syntax/instructions}{syntax-vsbinop}{\X{vsbinop}}
 .. |vsternop| mathdef:: \xref{syntax/instructions}{syntax-vsternop}{\X{vsternop}}

From 0a9de17b1ece3a7971c97f1a9bac21c5c0189ef3 Mon Sep 17 00:00:00 2001
From: Ng Zhi An <zhin@google.com>
Date: Tue, 25 Aug 2020 10:17:50 -0700
Subject: [PATCH 20/20] Split instructions into separate lines, remove todo

---
 document/core/syntax/instructions.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/document/core/syntax/instructions.rst b/document/core/syntax/instructions.rst
index ac0dbff43..ad99f9ae3 100644
--- a/document/core/syntax/instructions.rst
+++ b/document/core/syntax/instructions.rst
@@ -208,7 +208,8 @@ SIMD instructions provide basic operations over :ref:`values <syntax-value>` of
      \K{v128.}\vsunop \\&&|&
      \K{v128.}\vsbinop \\&&|&
      \K{v128.}\vsternop \\&&|&
-     \K{i8x16.}\SHUFFLE~\laneidx^{16} ~|~ \K{i8x16.}\SWIZZLE \\&&|&
+     \K{i8x16.}\SHUFFLE~\laneidx^{16} \\&&|&
+     \K{i8x16.}\SWIZZLE \\&&|&
      \X{vxx}\K{.}\SPLAT \\&&|&
      \K{i8x16.}\EXTRACTLANE\K{\_}\sx~\laneidx ~|~
      \K{i16x8.}\EXTRACTLANE\K{\_}\sx~\laneidx \\&&|&
@@ -319,9 +320,6 @@ Operations are performed point-wise on the values of each lane.
 
 Instructions prefixed with :math:`\K{v128}` do not involve a specific interpretation, and treat the |V128| as an |i128| value or a vector of 128 individual bits.
 
-.. todo::
-  write up runtime interpretation for the lane shapes
-
 SIMD instructions can be grouped into several subcategories:
 
 * *Constants*: return a static constant.