Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Arm64] More overloads for *BySelectedScalar methods #33683

Closed
echesakov opened this issue Mar 17, 2020 · 9 comments · Fixed by #36916
Closed

[Arm64] More overloads for *BySelectedScalar methods #33683

echesakov opened this issue Mar 17, 2020 · 9 comments · Fixed by #36916
Assignees
Labels
api-approved API was approved in API review, it can be implemented arch-arm64 area-System.Runtime.Intrinsics
Milestone

Comments

@echesakov
Copy link
Contributor

echesakov commented Mar 17, 2020

Follow-up on #24794 (comment)

namespace System.Runtime.Intrinsics.Arm
{
    public abstract class AdvSimd
    {
        public abstract class Arm64
        {
            /// <summary>
            /// Floating-point fused Multiply-Add to accumulator
            /// For each element result[elem] = addend[elem] + left[elem] * right[rightIndex]
            /// Corresponds to vector forms of FMLA
            /// </summary>
            public static Vector64<float>   FusedMultiplyAddBySelectedScalar(Vector64<float>   addend, Vector64<float>   left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   FusedMultiplyAddBySelectedScalar(Vector64<float>   addend, Vector64<float>   left, Vector128<float>  right, byte rightIndex);

            public static Vector128<double> FusedMultiplyAddBySelectedScalar(Vector128<double> addend, Vector128<double> left, Vector128<double> right, byte rightIndex);
            public static Vector128<float>  FusedMultiplyAddBySelectedScalar(Vector128<float>  addend, Vector128<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector128<float>  FusedMultiplyAddBySelectedScalar(Vector128<float>  addend, Vector128<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  FusedMultiplyAddScalarBySelectedScalar(Vector64<double> addend, Vector64<double> left, Vector128<double> right, byte rightIndex);
            public static Vector64<float>   FusedMultiplyAddScalarBySelectedScalar(Vector64<float>  addend, Vector64<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   FusedMultiplyAddScalarBySelectedScalar(Vector64<float>  addend, Vector64<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  FusedMultiplyAddScalar(Vector64<double> addend, Vector64<double> left, Vector64<double> right);
            public static Vector128<double> FusedMultiplyAddByScalar(Vector128<double> addend, Vector128<double> left, Vector64<double>  right);

            /// <summary>
            /// Floating-point fused Multiply-Subtract from accumulator
            /// For each element result[elem] = minuend[elem] - left[elem] * right[rightIndex]
            /// Corresponds to vector forms of FMLA
            /// </summary>
            public static Vector64<float>   FusedMultiplySubtractBySelectedScalar(Vector64<float>   minuend, Vector64<float>   left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   FusedMultiplySubtractBySelectedScalar(Vector64<float>   minuend, Vector64<float>   left, Vector128<float>  right, byte rightIndex);

            public static Vector128<double> FusedMultiplySubtractBySelectedScalar(Vector128<double> minuend, Vector128<double> left, Vector128<double> right, byte rightIndex);
            public static Vector128<float>  FusedMultiplySubtractBySelectedScalar(Vector128<float>  minuend, Vector128<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector128<float>  FusedMultiplySubtractBySelectedScalar(Vector128<float>  minuend, Vector128<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  FusedMultiplySubtractScalarBySelectedScalar(Vector64<double> minuend, Vector64<double> left, Vector128<double> right, byte rightIndex);
            public static Vector64<float>   FusedMultiplySubtractScalarBySelectedScalar(Vector64<float>  minuend, Vector64<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   FusedMultiplySubtractScalarBySelectedScalar(Vector64<float>  minuend, Vector64<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  FusedMultiplySubtractScalar(Vector64<double> minuend, Vector64<double> left, Vector64<double> right);
            public static Vector128<double> FusedMultiplySubtractByScalar(Vector128<double> minuend, Vector128<double> left, Vector64<double>  right);

            /// <summary>
            /// Floating-point Multiply
            /// For each element result[elem] = left[elem] * right[rightIndex]
            /// Corresponds to vector forms of FMUL
            /// </summary>
            public static Vector64<float>   MultiplyBySelectedScalar(Vector64<float>   left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   MultiplyBySelectedScalar(Vector64<float>   left, Vector128<float>  right, byte rightIndex);

            public static Vector128<double> MultiplyBySelectedScalar(Vector128<double> left, Vector128<double> right, byte rightIndex);
            public static Vector128<float>  MultiplyBySelectedScalar(Vector128<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector128<float>  MultiplyBySelectedScalar(Vector128<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  MultiplyScalarBySelectedScalar(Vector64<double> left, Vector128<double> right, byte rightIndex);
            public static Vector64<float>   MultiplyScalarBySelectedScalar(Vector64<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   MultiplyScalarBySelectedScalar(Vector64<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  MultiplyScalar(Vector64<double> left, Vector64<double> right);
            public static Vector128<double> MultiplyByScalar(Vector128<double> left, Vector64<double>  right);

            /// <summary>
            /// Floating-point Multiply extended
            /// For each element result[elem] = left[elem] * right[rightIndex]
            /// Corresponds to vector forms of FMULX
            /// </summary>
         // public static Vector64<float>   MultiplyExtendedBySelectedScalar(Vector64<float>   left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   MultiplyExtendedBySelectedScalar(Vector64<float>   left, Vector128<float>  right, byte rightIndex);

         // public static Vector128<double> MultiplyExtendedBySelectedScalar(Vector128<double> left, Vector128<double> right, byte rightIndex);
            public static Vector128<float>  MultiplyExtendedBySelectedScalar(Vector128<float>  left, Vector64<float>   right, byte rightIndex);
         // public static Vector128<float>  MultiplyExtendedBySelectedScalar(Vector128<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  MultiplyExtendedScalarBySelectedScalar(Vector64<double> left,  Vector128<double> right, byte rightIndex);
            public static Vector64<float>   MultiplyExtendedScalarBySelectedScalar(Vector64<float>  left,   Vector64<float>  right, byte rightIndex);
            public static Vector64<float>   MultiplyExtendedScalarBySelectedScalar(Vector64<float>  left,   Vector128<float> right, byte rightIndex);

            public static Vector64<double>  MultiplyExtendedScalar(Vector64<double> left, Vector64<double> right);
            public static Vector128<double> MultiplyExtendedByScalar(Vector128<double> left, Vector64<double>  right);

            /// <summary>
            /// Multiply-Add to accumulator
            /// For each element result[elem] = addend[elem] + left[elem] * right[rightIndex]
            /// Corresponds to vector forms of MLA
            /// </summary>
         // public static Vector64<byte>    MultiplyAddBySelectedScalar(Vector64<byte>    addend, Vector64<byte>    left, Vector64<byte>    right, byte rightIndex);
         // public static Vector64<sbyte>   MultiplyAddBySelectedScalar(Vector64<sbyte>   addend, Vector64<sbyte>   left, Vector64<sbyte>   right, byte rightIndex);
         // public static Vector64<ushort>  MultiplyAddBySelectedScalar(Vector64<ushort>  addend, Vector64<ushort>  left, Vector64<ushort>  right, byte rightIndex);
         // public static Vector64<short>   MultiplyAddBySelectedScalar(Vector64<short>   addend, Vector64<short>   left, Vector64<short>   right, byte rightIndex);
         // public static Vector64<uint>    MultiplyAddBySelectedScalar(Vector64<uint>    addend, Vector64<uint>    left, Vector64<uint>    right, byte rightIndex);
         // public static Vector64<int>     MultiplyAddBySelectedScalar(Vector64<int>     addend, Vector64<int>     left, Vector64<int>     right, byte rightIndex);

            public static Vector64<byte>    MultiplyAddBySelectedScalar(Vector64<byte>    addend, Vector64<byte>    left, Vector128<byte>   right, byte rightIndex);
            public static Vector64<sbyte>   MultiplyAddBySelectedScalar(Vector64<sbyte>   addend, Vector64<sbyte>   left, Vector128<sbyte>  right, byte rightIndex);
            public static Vector64<ushort>  MultiplyAddBySelectedScalar(Vector64<ushort>  addend, Vector64<ushort>  left, Vector128<ushort> right, byte rightIndex);
            public static Vector64<short>   MultiplyAddBySelectedScalar(Vector64<short>   addend, Vector64<short>   left, Vector128<short>  right, byte rightIndex);
            public static Vector64<uint>    MultiplyAddBySelectedScalar(Vector64<uint>    addend, Vector64<uint>    left, Vector128<uint>   right, byte rightIndex);
            public static Vector64<int>     MultiplyAddBySelectedScalar(Vector64<int>     addend, Vector64<int>     left, Vector128<int>    right, byte rightIndex);

            public static Vector128<byte>   MultiplyAddBySelectedScalar(Vector128<byte>   addend, Vector128<byte>   left, Vector64<byte>    right, byte rightIndex);
            public static Vector128<sbyte>  MultiplyAddBySelectedScalar(Vector128<sbyte>  addend, Vector128<sbyte>  left, Vector64<sbyte>   right, byte rightIndex);
            public static Vector128<ushort> MultiplyAddBySelectedScalar(Vector128<ushort> addend, Vector128<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<short>  MultiplyAddBySelectedScalar(Vector128<short>  addend, Vector128<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyAddBySelectedScalar(Vector128<uint>   addend, Vector128<uint>   left, Vector64<uint>    right, byte rightIndex);
            public static Vector128<int>    MultiplyAddBySelectedScalar(Vector128<int>    addend, Vector128<int>    left, Vector64<int>     right, byte rightIndex);

         // public static Vector128<byte>   MultiplyAddBySelectedScalar(Vector128<byte>   addend, Vector128<byte>   left, Vector128<byte>   right, byte rightIndex);
         // public static Vector128<sbyte>  MultiplyAddBySelectedScalar(Vector128<sbyte>  addend, Vector128<sbyte>  left, Vector128<sbyte>  right, byte rightIndex);
         // public static Vector128<ushort> MultiplyAddBySelectedScalar(Vector128<ushort> addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
         // public static Vector128<short>  MultiplyAddBySelectedScalar(Vector128<short>  addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
         // public static Vector128<uint>   MultiplyAddBySelectedScalar(Vector128<uint>   addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);
         // public static Vector128<int>    MultiplyAddBySelectedScalar(Vector128<int>    addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);

            /// <summary>
            /// Multiply-Subtract from accumulator
            /// For each element result[elem] = minuend - left[elem] * right[rightIndex]
            /// Corresponds to vector forms of MLS
            /// </summary>
         // public static Vector64<byte>    MultiplySubtractBySelectedScalar(Vector64<byte>    minuend, Vector64<byte>    left, Vector64<byte>    right, byte rightIndex);
         // public static Vector64<sbyte>   MultiplySubtractBySelectedScalar(Vector64<sbyte>   minuend, Vector64<sbyte>   left, Vector64<sbyte>   right, byte rightIndex);
         // public static Vector64<ushort>  MultiplySubtractBySelectedScalar(Vector64<ushort>  minuend, Vector64<ushort>  left, Vector64<ushort>  right, byte rightIndex);
         // public static Vector64<short>   MultiplySubtractBySelectedScalar(Vector64<short>   minuend, Vector64<short>   left, Vector64<short>   right, byte rightIndex);
         // public static Vector64<uint>    MultiplySubtractBySelectedScalar(Vector64<uint>    minuend, Vector64<uint>    left, Vector64<uint>    right, byte rightIndex);
         // public static Vector64<int>     MultiplySubtractBySelectedScalar(Vector64<int>     minuend, Vector64<int>     left, Vector64<int>     right, byte rightIndex);

            public static Vector64<byte>    MultiplySubtractBySelectedScalar(Vector64<byte>    minuend, Vector64<byte>    left, Vector128<byte>   right, byte rightIndex);
            public static Vector64<sbyte>   MultiplySubtractBySelectedScalar(Vector64<sbyte>   minuend, Vector64<sbyte>   left, Vector128<sbyte>  right, byte rightIndex);
            public static Vector64<ushort>  MultiplySubtractBySelectedScalar(Vector64<ushort>  minuend, Vector64<ushort>  left, Vector128<ushort> right, byte rightIndex);
            public static Vector64<short>   MultiplySubtractBySelectedScalar(Vector64<short>   minuend, Vector64<short>   left, Vector128<short>  right, byte rightIndex);
            public static Vector64<uint>    MultiplySubtractBySelectedScalar(Vector64<uint>    minuend, Vector64<uint>    left, Vector128<uint>   right, byte rightIndex);
            public static Vector64<int>     MultiplySubtractBySelectedScalar(Vector64<int>     minuend, Vector64<int>     left, Vector128<int>    right, byte rightIndex);

            public static Vector128<byte>   MultiplySubtractBySelectedScalar(Vector128<byte>   minuend, Vector128<byte>   left, Vector64<byte>    right, byte rightIndex);
            public static Vector128<sbyte>  MultiplySubtractBySelectedScalar(Vector128<sbyte>  minuend, Vector128<sbyte>  left, Vector64<sbyte>   right, byte rightIndex);
            public static Vector128<ushort> MultiplySubtractBySelectedScalar(Vector128<ushort> minuend, Vector128<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<short>  MultiplySubtractBySelectedScalar(Vector128<short>  minuend, Vector128<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplySubtractBySelectedScalar(Vector128<uint>   minuend, Vector128<uint>   left, Vector64<uint>    right, byte rightIndex);
            public static Vector128<int>    MultiplySubtractBySelectedScalar(Vector128<int>    minuend, Vector128<int>    left, Vector64<int>     right, byte rightIndex);

         // public static Vector128<byte>   MultiplySubtractBySelectedScalar(Vector128<byte>   minuend, Vector128<byte>   left, Vector128<byte>   right, byte rightIndex);
         // public static Vector128<sbyte>  MultiplySubtractBySelectedScalar(Vector128<sbyte>  minuend, Vector128<sbyte>  left, Vector128<sbyte>  right, byte rightIndex);
         // public static Vector128<ushort> MultiplySubtractBySelectedScalar(Vector128<ushort> minuend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
         // public static Vector128<short>  MultiplySubtractBySelectedScalar(Vector128<short>  minuend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
         // public static Vector128<uint>   MultiplySubtractBySelectedScalar(Vector128<uint>   minuend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);
         // public static Vector128<int>    MultiplySubtractBySelectedScalar(Vector128<int>    minuend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);

            /// <summary>
            /// Multiply
            /// For each element result[elem] = left[elem] * right[rightIndex]
            /// Corresponds to vector forms of MUL
            /// </summary>
            public static Vector64<byte>    MultiplyBySelectedScalar(Vector64<byte>    left, Vector64<byte>    right, byte rightIndex);
            public static Vector64<sbyte>   MultiplyBySelectedScalar(Vector64<sbyte>   left, Vector64<sbyte>   right, byte rightIndex);
            public static Vector64<ushort>  MultiplyBySelectedScalar(Vector64<ushort>  left, Vector64<ushort>  right, byte rightIndex);
            public static Vector64<short>   MultiplyBySelectedScalar(Vector64<short>   left, Vector64<short>   right, byte rightIndex);
            public static Vector64<uint>    MultiplyBySelectedScalar(Vector64<uint>    left, Vector64<uint>    right, byte rightIndex);
            public static Vector64<int>     MultiplyBySelectedScalar(Vector64<int>     left, Vector64<int>     right, byte rightIndex);

            public static Vector64<byte>    MultiplyBySelectedScalar(Vector64<byte>    left, Vector128<byte>   right, byte rightIndex);
            public static Vector64<sbyte>   MultiplyBySelectedScalar(Vector64<sbyte>   left, Vector128<sbyte>  right, byte rightIndex);
            public static Vector64<ushort>  MultiplyBySelectedScalar(Vector64<ushort>  left, Vector128<ushort> right, byte rightIndex);
            public static Vector64<short>   MultiplyBySelectedScalar(Vector64<short>   left, Vector128<short>  right, byte rightIndex);
            public static Vector64<uint>    MultiplyBySelectedScalar(Vector64<uint>    left, Vector128<uint>   right, byte rightIndex);
            public static Vector64<int>     MultiplyBySelectedScalar(Vector64<int>     left, Vector128<int>    right, byte rightIndex);

            public static Vector128<byte>   MultiplyBySelectedScalar(Vector128<byte>   left, Vector64<byte>    right, byte rightIndex);
            public static Vector128<sbyte>  MultiplyBySelectedScalar(Vector128<sbyte>  left, Vector64<sbyte>   right, byte rightIndex);
            public static Vector128<ushort> MultiplyBySelectedScalar(Vector128<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<short>  MultiplyBySelectedScalar(Vector128<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalar(Vector128<uint>   left, Vector64<uint>    right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalar(Vector128<int>    left, Vector64<int>     right, byte rightIndex);

            public static Vector128<byte>   MultiplyBySelectedScalar(Vector128<byte>   left, Vector128<byte>   right, byte rightIndex);
            public static Vector128<sbyte>  MultiplyBySelectedScalar(Vector128<sbyte>  left, Vector128<sbyte>  right, byte rightIndex);
            public static Vector128<ushort> MultiplyBySelectedScalar(Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<short>  MultiplyBySelectedScalar(Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalar(Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalar(Vector128<int>    left, Vector128<int>    right, byte rightIndex);

            /// <summary>
            /// Signed or Unsigned Multiply-Add Long
            /// For each element result[elem] = addend[elem] + left[elem] * right[rightIndex]
            /// Corresponds to vector forms of SMLAL and UMLAL
            /// </summary>
            public static Vector128<int>    MultiplyWideningLowerAndAdd(Vector128<int>    addend, Vector64<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningLowerAndAdd(Vector128<uint>   addend, Vector64<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningLowerAndAdd(Vector128<long>   addend, Vector64<int>    left, Vector64<int>     right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningLowerAndAdd(Vector128<ulong>  addend, Vector64<uint>   left, Vector64<uint>    right, byte rightIndex);

            public static Vector128<int>    MultiplyWideningLowerAndAdd(Vector128<int>    addend, Vector64<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningLowerAndAdd(Vector128<uint>   addend, Vector64<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningLowerAndAdd(Vector128<long>   addend, Vector64<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningLowerAndAdd(Vector128<ulong>  addend, Vector64<uint>   left, Vector128<uint>   right, byte rightIndex);

            /// <summary>
            /// Signed or Unsigned Multiply-Add Long
            /// For each element result[elem] = addend[elem] + left[elem] * right[rightIndex]
            /// Corresponds to vector forms of SMLAL2 and UMLAL2
            /// </summary>
            public static Vector128<int>    MultiplyWideningUpperAndAdd(Vector128<int>    addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningUpperAndAdd(Vector128<uint>   addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningUpperAndAdd(Vector128<long>   addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningUpperAndAdd(Vector128<ulong>  addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);

            public static Vector128<int>    MultiplyWideningUpperAndAdd(Vector128<int>    addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningUpperAndAdd(Vector128<uint>   addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningUpperAndAdd(Vector128<long>   addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningUpperAndAdd(Vector128<ulong>  addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);

            /// <summary>
            /// Signed or Unsigned Multiply-Subtract Long
            /// For each element result[elem] = minuend[elem] - left[elem] * right[rightIndex]
            /// Corresponds to vector forms of SMLSL and UMLSL
            /// </summary>
            public static Vector128<int>    MultiplyWideningLowerAndSubtract(Vector128<int>    addend, Vector64<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningLowerAndSubtract(Vector128<uint>   addend, Vector64<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningLowerAndSubtract(Vector128<long>   addend, Vector64<int>    left, Vector64<int>     right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningLowerAndSubtract(Vector128<ulong>  addend, Vector64<uint>   left, Vector64<uint>    right, byte rightIndex);

            public static Vector128<int>    MultiplyWideningLowerAndSubtract(Vector128<int>    addend, Vector64<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningLowerAndSubtract(Vector128<uint>   addend, Vector64<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningLowerAndSubtract(Vector128<long>   addend, Vector64<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningLowerAndSubtract(Vector128<ulong>  addend, Vector64<uint>   left, Vector128<uint>   right, byte rightIndex);

            /// <summary>
            /// Signed or Unsigned Multiply-Subtract Long
            /// For each element result[elem] = minuend[elem] - left[elem] * right[rightIndex]
            /// Corresponds to vector forms of SMLSL2 and UMLSL2
            /// </summary>
            public static Vector128<int>    MultiplyWideningUpperAndSubtract(Vector128<int>    addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningUpperAndSubtract(Vector128<uint>   addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningUpperAndSubtract(Vector128<long>   addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningUpperAndSubtract(Vector128<ulong>  addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);

            public static Vector128<int>    MultiplyWideningUpperAndSubtract(Vector128<int>    addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningUpperAndSubtract(Vector128<uint>   addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningUpperAndSubtract(Vector128<long>   addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningUpperAndSubtract(Vector128<ulong>  addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);

            /// <summary>
            /// Signed or Unsigned Multiply Long
            /// For each element result[elem] = left[elem] * right[rightIndex]
            /// Corresponds to vector forms of SMULL and UMULL
            /// </summary>
            public static Vector128<int>    MultiplyWideningLower(Vector64<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningLower(Vector64<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningLower(Vector64<int>    left, Vector64<int>     right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningLower(Vector64<uint>   left, Vector64<uint>    right, byte rightIndex);

            public static Vector128<int>    MultiplyWideningLower(Vector64<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningLower(Vector64<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningLower(Vector64<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningLower(Vector64<uint>   left, Vector128<uint>   right, byte rightIndex);

            /// <summary>
            /// Signed or Unsigned Multiply Long
            /// For each element result[elem] = left[elem] * right[rightIndex]
            /// Corresponds to vector forms of SMULL2 and UMULL2
            /// </summary>
            public static Vector128<int>    MultiplyWideningUpper(Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningUpper(Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningUpper(Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningUpper(Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);

            public static Vector128<int>    MultiplyWideningUpper(Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyWideningUpper(Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyWideningUpper(Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyWideningUpper(Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);
        }
    }
}

cc @terrajobst @tannergooding @TamarChristinaArm

One more question: do we need rightIndex parameter at all for FusedMultiplyAddBySelectedScalar(Vector128<double> addend, Vector128<double> left, Vector64<double> right, byte rightIndex) since it only can have one value (i.e. 0)

We might just have FusedMultiplyAddByScalar(Vector128<double> addend, Vector128<double> left, Vector64<double> right) for this one instead ? However, this could be confused with FusedMultiplyAddScalar(Vector64<double> acc, Vector64<double> left, Vector64<double> right)...

@echesakov echesakov added api-needs-work API needs work before it is approved, it is NOT ready for implementation arch-arm64 area-System.Runtime.Intrinsics labels Mar 17, 2020
@Dotnet-GitSync-Bot
Copy link
Collaborator

I couldn't add an area label to this Issue.

Checkout this page to find out which area owner to ping, or please add exactly one area label to help train me in the future.

@Dotnet-GitSync-Bot Dotnet-GitSync-Bot added the untriaged New issue has not been triaged by the area owner label Mar 17, 2020
@tannergooding
Copy link
Member

@echesakovMSFT, it looks like there should also be FusedMultiplyAddScalarBySelectedScalar variants and that this should be extended to also include FusedMultiplySubtract at a minimum.

C3.5.17 SIMD by element arithmetic contains a number of other variants we should likely also cover. Do you have any concerns with me updating the proposal to include them?

@echesakov
Copy link
Contributor Author

@echesakovMSFT, it looks like there should also be FusedMultiplyAddScalarBySelectedScalar variants and that this should be extended to also include FusedMultiplySubtract at a minimum.
C3.5.17 SIMD by element arithmetic contains a number of other variants we should likely also cover. Do you have any concerns with me updating the proposal to include them?

Nope, please go ahead.

@TamarChristinaArm
Copy link
Contributor

We might just have FusedMultiplyAddByScalar(Vector128 addend, Vector128 left, Vector64 right) for this one instead ? However, this could be confused with FusedMultiplyAddScalar(Vector64 acc, Vector64 left, Vector64 right).

I'm slightly confused.. what was the resolution to this? :) I would say since the intrinsics is supposed to match to the instruction that we should have the FusedMultiplyAddBySelectedScalar even though it only takes 0 as that's the instruction. For convenience we already have FusedMultiplyAddScalar as you said @echesakovMSFT

@tannergooding
Copy link
Member

I'm slightly confused.. what was the resolution to this? :)

For this one in particular, I think it would be fine to make the signature public static Vector128<double> FusedMultiplyAddBySelectedScalar(Vector128<double> addend, Vector128<double> left, Vector64<double> right); (dropping byte rightIndex). This still exposes the appropriate functionality without allowing the user to pass in an "invalid" index when there is only a single correct input.

Extract and Insert are in a similar boat: #35030 (comment) and #35030 (comment)

@tannergooding
Copy link
Member

Updated to include all the BySelectedScalar APIs minus:

  • SQDMULH
  • SQDMLAL, SQDMLAL2
  • SQDMLSL, SQDMLSL2
  • SQDMULL, SQDMULL2
  • SQRDMLAH
  • SQRDMLSH
  • SQRDMULH

These ones don't have a proposal for the corresponding "regular" overloads yet so I'll add them alongside that.

@tannergooding tannergooding added api-ready-for-review and removed api-needs-work API needs work before it is approved, it is NOT ready for implementation untriaged New issue has not been triaged by the area owner labels Apr 16, 2020
@echesakov
Copy link
Contributor Author

For this one in particular, I think it would be fine to make the signature public static Vector128<double> FusedMultiplyAddBySelectedScalar(Vector128<double> addend, Vector128<double> left, Vector64<double> right); (dropping byte rightIndex). This still exposes the appropriate functionality without allowing the user to pass in an "invalid" index when there is only a single correct input.

What I don't like that we name the method BySelectedScalar that to me implies you can select a scalar and if we drop rightIndex it would be even more confusing. That's why I originally proposed to name it
FusedMultiplyAddByScalar(Vector128<double> addend, Vector128<double> left, Vector64<double> right)

I'm slightly confused.. what was the resolution to this? :)

@TamarChristinaArm I don't think we have any conclusion on this

@tannergooding
Copy link
Member

Updated to use ByScalar

@echesakov echesakov added this to the 5.0 milestone May 12, 2020
@terrajobst terrajobst added api-approved API was approved in API review, it can be implemented and removed api-ready-for-review labels May 19, 2020
@terrajobst
Copy link
Member

terrajobst commented May 19, 2020

  • Looks good as proposed
namespace System.Runtime.Intrinsics.Arm
{
    partial class AdvSimd
    {
        partial class Arm64
        {
            public static Vector64<float>   FusedMultiplyAddBySelectedScalar(Vector64<float>   addend, Vector64<float>   left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   FusedMultiplyAddBySelectedScalar(Vector64<float>   addend, Vector64<float>   left, Vector128<float>  right, byte rightIndex);

            public static Vector128<double> FusedMultiplyAddBySelectedScalar(Vector128<double> addend, Vector128<double> left, Vector128<double> right, byte rightIndex);
            public static Vector128<float>  FusedMultiplyAddBySelectedScalar(Vector128<float>  addend, Vector128<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector128<float>  FusedMultiplyAddBySelectedScalar(Vector128<float>  addend, Vector128<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  FusedMultiplyAddScalarBySelectedScalar(Vector64<double> addend, Vector64<double> left, Vector128<double> right, byte rightIndex);
            public static Vector64<float>   FusedMultiplyAddScalarBySelectedScalar(Vector64<float>  addend, Vector64<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   FusedMultiplyAddScalarBySelectedScalar(Vector64<float>  addend, Vector64<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  FusedMultiplyAddScalar(Vector64<double> addend, Vector64<double> left, Vector64<double> right);
            public static Vector128<double> FusedMultiplyAddByScalar(Vector128<double> addend, Vector128<double> left, Vector64<double> right);
            public static Vector64<float>   FusedMultiplySubtractBySelectedScalar(Vector64<float>   minuend, Vector64<float>   left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   FusedMultiplySubtractBySelectedScalar(Vector64<float>   minuend, Vector64<float>   left, Vector128<float>  right, byte rightIndex);

            public static Vector128<double> FusedMultiplySubtractBySelectedScalar(Vector128<double> minuend, Vector128<double> left, Vector128<double> right, byte rightIndex);
            public static Vector128<float>  FusedMultiplySubtractBySelectedScalar(Vector128<float>  minuend, Vector128<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector128<float>  FusedMultiplySubtractBySelectedScalar(Vector128<float>  minuend, Vector128<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  FusedMultiplySubtractScalarBySelectedScalar(Vector64<double> minuend, Vector64<double> left, Vector128<double> right, byte rightIndex);
            public static Vector64<float>   FusedMultiplySubtractScalarBySelectedScalar(Vector64<float>  minuend, Vector64<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   FusedMultiplySubtractScalarBySelectedScalar(Vector64<float>  minuend, Vector64<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  FusedMultiplySubtractScalar(Vector64<double> minuend, Vector64<double> left, Vector64<double> right);
            public static Vector128<double> FusedMultiplySubtractByScalar(Vector128<double> minuend, Vector128<double> left, Vector64<double>  right);
            public static Vector64<float>   MultiplyBySelectedScalar(Vector64<float>   left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   MultiplyBySelectedScalar(Vector64<float>   left, Vector128<float>  right, byte rightIndex);

            public static Vector128<double> MultiplyBySelectedScalar(Vector128<double> left, Vector128<double> right, byte rightIndex);
            public static Vector128<float>  MultiplyBySelectedScalar(Vector128<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector128<float>  MultiplyBySelectedScalar(Vector128<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  MultiplyScalarBySelectedScalar(Vector64<double> left, Vector128<double> right, byte rightIndex);
            public static Vector64<float>   MultiplyScalarBySelectedScalar(Vector64<float>  left, Vector64<float>   right, byte rightIndex);
            public static Vector64<float>   MultiplyScalarBySelectedScalar(Vector64<float>  left, Vector128<float>  right, byte rightIndex);

            public static Vector64<double>  MultiplyScalar(Vector64<double> left, Vector64<double> right);
            public static Vector128<double> MultiplyByScalar(Vector128<double> left, Vector64<double>  right);

            public static Vector64<float>   MultiplyExtendedBySelectedScalar(Vector64<float>   left, Vector128<float>  right, byte rightIndex);
            public static Vector128<float>  MultiplyExtendedBySelectedScalar(Vector128<float>  left, Vector64<float>   right, byte rightIndex);

            public static Vector64<double>  MultiplyExtendedScalarBySelectedScalar(Vector64<double> left,  Vector128<double> right, byte rightIndex);
            public static Vector64<float>   MultiplyExtendedScalarBySelectedScalar(Vector64<float>  left,   Vector64<float>  right, byte rightIndex);
            public static Vector64<float>   MultiplyExtendedScalarBySelectedScalar(Vector64<float>  left,   Vector128<float> right, byte rightIndex);

            public static Vector64<double>  MultiplyExtendedScalar(Vector64<double> left, Vector64<double> right);
            public static Vector128<double> MultiplyExtendedByScalar(Vector128<double> left, Vector64<double>  right);

            public static Vector64<byte>    MultiplyAddBySelectedScalar(Vector64<byte>    addend, Vector64<byte>    left, Vector128<byte>   right, byte rightIndex);
            public static Vector64<sbyte>   MultiplyAddBySelectedScalar(Vector64<sbyte>   addend, Vector64<sbyte>   left, Vector128<sbyte>  right, byte rightIndex);
            public static Vector64<ushort>  MultiplyAddBySelectedScalar(Vector64<ushort>  addend, Vector64<ushort>  left, Vector128<ushort> right, byte rightIndex);
            public static Vector64<short>   MultiplyAddBySelectedScalar(Vector64<short>   addend, Vector64<short>   left, Vector128<short>  right, byte rightIndex);
            public static Vector64<uint>    MultiplyAddBySelectedScalar(Vector64<uint>    addend, Vector64<uint>    left, Vector128<uint>   right, byte rightIndex);
            public static Vector64<int>     MultiplyAddBySelectedScalar(Vector64<int>     addend, Vector64<int>     left, Vector128<int>    right, byte rightIndex);

            public static Vector128<byte>   MultiplyAddBySelectedScalar(Vector128<byte>   addend, Vector128<byte>   left, Vector64<byte>    right, byte rightIndex);
            public static Vector128<sbyte>  MultiplyAddBySelectedScalar(Vector128<sbyte>  addend, Vector128<sbyte>  left, Vector64<sbyte>   right, byte rightIndex);
            public static Vector128<ushort> MultiplyAddBySelectedScalar(Vector128<ushort> addend, Vector128<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<short>  MultiplyAddBySelectedScalar(Vector128<short>  addend, Vector128<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyAddBySelectedScalar(Vector128<uint>   addend, Vector128<uint>   left, Vector64<uint>    right, byte rightIndex);
            public static Vector128<int>    MultiplyAddBySelectedScalar(Vector128<int>    addend, Vector128<int>    left, Vector64<int>     right, byte rightIndex);


            public static Vector64<byte>    MultiplySubtractBySelectedScalar(Vector64<byte>    minuend, Vector64<byte>    left, Vector128<byte>   right, byte rightIndex);
            public static Vector64<sbyte>   MultiplySubtractBySelectedScalar(Vector64<sbyte>   minuend, Vector64<sbyte>   left, Vector128<sbyte>  right, byte rightIndex);
            public static Vector64<ushort>  MultiplySubtractBySelectedScalar(Vector64<ushort>  minuend, Vector64<ushort>  left, Vector128<ushort> right, byte rightIndex);
            public static Vector64<short>   MultiplySubtractBySelectedScalar(Vector64<short>   minuend, Vector64<short>   left, Vector128<short>  right, byte rightIndex);
            public static Vector64<uint>    MultiplySubtractBySelectedScalar(Vector64<uint>    minuend, Vector64<uint>    left, Vector128<uint>   right, byte rightIndex);
            public static Vector64<int>     MultiplySubtractBySelectedScalar(Vector64<int>     minuend, Vector64<int>     left, Vector128<int>    right, byte rightIndex);

            public static Vector128<byte>   MultiplySubtractBySelectedScalar(Vector128<byte>   minuend, Vector128<byte>   left, Vector64<byte>    right, byte rightIndex);
            public static Vector128<sbyte>  MultiplySubtractBySelectedScalar(Vector128<sbyte>  minuend, Vector128<sbyte>  left, Vector64<sbyte>   right, byte rightIndex);
            public static Vector128<ushort> MultiplySubtractBySelectedScalar(Vector128<ushort> minuend, Vector128<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<short>  MultiplySubtractBySelectedScalar(Vector128<short>  minuend, Vector128<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplySubtractBySelectedScalar(Vector128<uint>   minuend, Vector128<uint>   left, Vector64<uint>    right, byte rightIndex);
            public static Vector128<int>    MultiplySubtractBySelectedScalar(Vector128<int>    minuend, Vector128<int>    left, Vector64<int>     right, byte rightIndex);

            public static Vector64<byte>    MultiplyBySelectedScalar(Vector64<byte>    left, Vector64<byte>    right, byte rightIndex);
            public static Vector64<sbyte>   MultiplyBySelectedScalar(Vector64<sbyte>   left, Vector64<sbyte>   right, byte rightIndex);
            public static Vector64<ushort>  MultiplyBySelectedScalar(Vector64<ushort>  left, Vector64<ushort>  right, byte rightIndex);
            public static Vector64<short>   MultiplyBySelectedScalar(Vector64<short>   left, Vector64<short>   right, byte rightIndex);
            public static Vector64<uint>    MultiplyBySelectedScalar(Vector64<uint>    left, Vector64<uint>    right, byte rightIndex);
            public static Vector64<int>     MultiplyBySelectedScalar(Vector64<int>     left, Vector64<int>     right, byte rightIndex);

            public static Vector64<byte>    MultiplyBySelectedScalar(Vector64<byte>    left, Vector128<byte>   right, byte rightIndex);
            public static Vector64<sbyte>   MultiplyBySelectedScalar(Vector64<sbyte>   left, Vector128<sbyte>  right, byte rightIndex);
            public static Vector64<ushort>  MultiplyBySelectedScalar(Vector64<ushort>  left, Vector128<ushort> right, byte rightIndex);
            public static Vector64<short>   MultiplyBySelectedScalar(Vector64<short>   left, Vector128<short>  right, byte rightIndex);
            public static Vector64<uint>    MultiplyBySelectedScalar(Vector64<uint>    left, Vector128<uint>   right, byte rightIndex);
            public static Vector64<int>     MultiplyBySelectedScalar(Vector64<int>     left, Vector128<int>    right, byte rightIndex);

            public static Vector128<byte>   MultiplyBySelectedScalar(Vector128<byte>   left, Vector64<byte>    right, byte rightIndex);
            public static Vector128<sbyte>  MultiplyBySelectedScalar(Vector128<sbyte>  left, Vector64<sbyte>   right, byte rightIndex);
            public static Vector128<ushort> MultiplyBySelectedScalar(Vector128<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<short>  MultiplyBySelectedScalar(Vector128<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalar(Vector128<uint>   left, Vector64<uint>    right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalar(Vector128<int>    left, Vector64<int>     right, byte rightIndex);

            public static Vector128<byte>   MultiplyBySelectedScalar(Vector128<byte>   left, Vector128<byte>   right, byte rightIndex);
            public static Vector128<sbyte>  MultiplyBySelectedScalar(Vector128<sbyte>  left, Vector128<sbyte>  right, byte rightIndex);
            public static Vector128<ushort> MultiplyBySelectedScalar(Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<short>  MultiplyBySelectedScalar(Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalar(Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalar(Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalarWideningLowerAndAdd(Vector128<int>    addend, Vector64<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningLowerAndAdd(Vector128<uint>   addend, Vector64<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningLowerAndAdd(Vector128<long>   addend, Vector64<int>    left, Vector64<int>     right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningLowerAndAdd(Vector128<ulong>  addend, Vector64<uint>   left, Vector64<uint>    right, byte rightIndex);

            public static Vector128<int>    MultiplyBySelectedScalarWideningLowerAndAdd(Vector128<int>    addend, Vector64<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningLowerAndAdd(Vector128<uint>   addend, Vector64<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningLowerAndAdd(Vector128<long>   addend, Vector64<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningLowerAndAdd(Vector128<ulong>  addend, Vector64<uint>   left, Vector128<uint>   right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalarWideningUpperAndAdd(Vector128<int>    addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningUpperAndAdd(Vector128<uint>   addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningUpperAndAdd(Vector128<long>   addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningUpperAndAdd(Vector128<ulong>  addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);

            public static Vector128<int>    MultiplyBySelectedScalarWideningUpperAndAdd(Vector128<int>    addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningUpperAndAdd(Vector128<uint>   addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningUpperAndAdd(Vector128<long>   addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningUpperAndAdd(Vector128<ulong>  addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalarWideningLowerAndSubtract(Vector128<int>    addend, Vector64<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningLowerAndSubtract(Vector128<uint>   addend, Vector64<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningLowerAndSubtract(Vector128<long>   addend, Vector64<int>    left, Vector64<int>     right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningLowerAndSubtract(Vector128<ulong>  addend, Vector64<uint>   left, Vector64<uint>    right, byte rightIndex);

            public static Vector128<int>    MultiplyBySelectedScalarWideningLowerAndSubtract(Vector128<int>    addend, Vector64<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningLowerAndSubtract(Vector128<uint>   addend, Vector64<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningLowerAndSubtract(Vector128<long>   addend, Vector64<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningLowerAndSubtract(Vector128<ulong>  addend, Vector64<uint>   left, Vector128<uint>   right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalarWideningUpperAndSubtract(Vector128<int>    addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningUpperAndSubtract(Vector128<uint>   addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningUpperAndSubtract(Vector128<long>   addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningUpperAndSubtract(Vector128<ulong>  addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);

            public static Vector128<int>    MultiplyBySelectedScalarWideningUpperAndSubtract(Vector128<int>    addend, Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningUpperAndSubtract(Vector128<uint>   addend, Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningUpperAndSubtract(Vector128<long>   addend, Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningUpperAndSubtract(Vector128<ulong>  addend, Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalarWideningLower(Vector64<short>  left, Vector64<short>   right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningLower(Vector64<ushort> left, Vector64<ushort>  right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningLower(Vector64<int>    left, Vector64<int>     right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningLower(Vector64<uint>   left, Vector64<uint>    right, byte rightIndex);

            public static Vector128<int>    MultiplyBySelectedScalarWideningLower(Vector64<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningLower(Vector64<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningLower(Vector64<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningLower(Vector64<uint>   left, Vector128<uint>   right, byte rightIndex);
            public static Vector128<int>    MultiplyBySelectedScalarWideningUpper(Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningUpper(Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningUpper(Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningUpper(Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);

            public static Vector128<int>    MultiplyBySelectedScalarWideningUpper(Vector128<short>  left, Vector128<short>  right, byte rightIndex);
            public static Vector128<uint>   MultiplyBySelectedScalarWideningUpper(Vector128<ushort> left, Vector128<ushort> right, byte rightIndex);
            public static Vector128<long>   MultiplyBySelectedScalarWideningUpper(Vector128<int>    left, Vector128<int>    right, byte rightIndex);
            public static Vector128<ulong>  MultiplyBySelectedScalarWideningUpper(Vector128<uint>   left, Vector128<uint>   right, byte rightIndex);
        }
    }
}

@echesakov echesakov self-assigned this May 22, 2020
@ghost ghost locked as resolved and limited conversation to collaborators Dec 10, 2020
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
api-approved API was approved in API review, it can be implemented arch-arm64 area-System.Runtime.Intrinsics
Projects
None yet
Development

Successfully merging a pull request may close this issue.

5 participants